diff --git "a/graph.pbtxt" "b/graph.pbtxt" new file mode 100644--- /dev/null +++ "b/graph.pbtxt" @@ -0,0 +1,264150 @@ +node { + name: "global_step/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@global_step" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT64 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT64 + tensor_shape { + } + int64_val: 0 + } + } + } +} +node { + name: "global_step" + op: "VarHandleOp" + attr { + key: "_class" + value { + list { + s: "loc:@global_step" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_INT64 + } + } + attr { + key: "shape" + value { + shape { + } + } + } + attr { + key: "shared_name" + value { + s: "global_step" + } + } +} +node { + name: "global_step/IsInitialized/VarIsInitializedOp" + op: "VarIsInitializedOp" + input: "global_step" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_step/Assign" + op: "AssignVariableOp" + input: "global_step" + input: "global_step/Initializer/zeros" + attr { + key: "dtype" + value { + type: DT_INT64 + } + } +} +node { + name: "global_step/Read/ReadVariableOp" + op: "ReadVariableOp" + input: "global_step" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT64 + } + } +} +node { + name: "global_step/VarIsInitializedOp" + op: "VarIsInitializedOp" + input: "global_step" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_step/cond/Switch" + op: "Switch" + input: "global_step/VarIsInitializedOp" + input: "global_step/VarIsInitializedOp" + attr { + key: "T" + value { + type: DT_BOOL + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + shape { + } + } + } + } +} +node { + name: "global_step/cond/switch_t" + op: "Identity" + input: "global_step/cond/Switch:1" + attr { + key: "T" + value { + type: DT_BOOL + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_step/cond/switch_f" + op: "Identity" + input: "global_step/cond/Switch" + attr { + key: "T" + value { + type: DT_BOOL + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_step/cond/pred_id" + op: "Identity" + input: "global_step/VarIsInitializedOp" + attr { + key: "T" + value { + type: DT_BOOL + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_step/cond/Read/ReadVariableOp" + op: "ReadVariableOp" + input: "global_step/cond/Read/ReadVariableOp/Switch:1" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT64 + } + } +} +node { + name: "global_step/cond/Read/ReadVariableOp/Switch" + op: "Switch" + input: "global_step" + input: "global_step/cond/pred_id" + attr { + key: "T" + value { + type: DT_RESOURCE + } + } + attr { + key: "_class" + value { + list { + s: "loc:@global_step" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + shape { + } + } + } + } +} +node { + name: "global_step/cond/Identity" + op: "Identity" + input: "global_step/cond/Read/ReadVariableOp" + attr { + key: "T" + value { + type: DT_INT64 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_step/cond/Switch_1" + op: "Switch" + input: "global_step/Initializer/zeros" + input: "global_step/cond/pred_id" + attr { + key: "T" + value { + type: DT_INT64 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@global_step" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + shape { + } + } + } + } +} +node { + name: "global_step/cond/Merge" + op: "Merge" + input: "global_step/cond/Switch_1" + input: "global_step/cond/Identity" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_INT64 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + shape { + } + } + } + } +} +node { + name: "global_step/add/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT64 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT64 + tensor_shape { + } + int64_val: 0 + } + } + } +} +node { + name: "global_step/add" + op: "AddV2" + input: "global_step/cond/Merge" + input: "global_step/add/y" + attr { + key: "T" + value { + type: DT_INT64 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "Const" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 1 + } + } + string_val: "/Data/albert/Albert.tfrecord" + } + } + } +} +node { + name: "TensorSliceDataset" + op: "TensorSliceDataset" + input: "Const" + device: "/device:CPU:0" + attr { + key: "Toutput_types" + value { + list { + type: DT_STRING + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "count" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT64 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT64 + tensor_shape { + } + int64_val: -1 + } + } + } +} +node { + name: "RepeatDataset" + op: "RepeatDataset" + input: "TensorSliceDataset" + input: "count" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "output_types" + value { + list { + type: DT_STRING + } + } + } +} +node { + name: "buffer_size" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT64 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT64 + tensor_shape { + } + int64_val: 1 + } + } + } +} +node { + name: "seed" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT64 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT64 + tensor_shape { + } + int64_val: 0 + } + } + } +} +node { + name: "seed2" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT64 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT64 + tensor_shape { + } + int64_val: 0 + } + } + } +} +node { + name: "ShuffleDataset" + op: "ShuffleDataset" + input: "RepeatDataset" + input: "buffer_size" + input: "seed" + input: "seed2" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "output_types" + value { + list { + type: DT_STRING + } + } + } + attr { + key: "reshuffle_each_iteration" + value { + b: true + } + } +} +node { + name: "cycle_length" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT64 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT64 + tensor_shape { + } + int64_val: 1 + } + } + } +} +node { + name: "block_length" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT64 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT64 + tensor_shape { + } + int64_val: 1 + } + } + } +} +node { + name: "sloppy" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_BOOL + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_BOOL + tensor_shape { + } + bool_val: true + } + } + } +} +node { + name: "buffer_output_elements" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT64 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT64 + tensor_shape { + } + int64_val: 2 + } + } + } +} +node { + name: "prefetch_input_elements" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT64 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT64 + tensor_shape { + } + int64_val: 2 + } + } + } +} +node { + name: "ParallelInterleaveDataset" + op: "ParallelInterleaveDataset" + input: "ShuffleDataset" + input: "cycle_length" + input: "block_length" + input: "sloppy" + input: "buffer_output_elements" + input: "prefetch_input_elements" + device: "/device:CPU:0" + attr { + key: "Targuments" + value { + list { + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "f" + value { + func { + name: "__inference_tf_data_experimental_parallel_interleave__43" + } + } + } + attr { + key: "output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "output_types" + value { + list { + type: DT_STRING + } + } + } +} +node { + name: "buffer_size_1" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT64 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT64 + tensor_shape { + } + int64_val: 100 + } + } + } +} +node { + name: "seed_1" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT64 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT64 + tensor_shape { + } + int64_val: 0 + } + } + } +} +node { + name: "seed2_1" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT64 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT64 + tensor_shape { + } + int64_val: 0 + } + } + } +} +node { + name: "ShuffleDataset_1" + op: "ShuffleDataset" + input: "ParallelInterleaveDataset" + input: "buffer_size_1" + input: "seed_1" + input: "seed2_1" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "output_types" + value { + list { + type: DT_STRING + } + } + } + attr { + key: "reshuffle_each_iteration" + value { + b: true + } + } +} +node { + name: "batch_size" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT64 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT64 + tensor_shape { + } + int64_val: 16 + } + } + } +} +node { + name: "num_parallel_calls" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT64 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT64 + tensor_shape { + } + int64_val: 64 + } + } + } +} +node { + name: "drop_remainder" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_BOOL + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_BOOL + tensor_shape { + } + bool_val: true + } + } + } +} +node { + name: "MapAndBatchDataset" + op: "MapAndBatchDataset" + input: "ShuffleDataset_1" + input: "batch_size" + input: "num_parallel_calls" + input: "drop_remainder" + device: "/device:CPU:0" + attr { + key: "Targuments" + value { + list { + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "f" + value { + func { + name: "tf_data_experimental_map_and_batch__54" + } + } + } + attr { + key: "output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + } + shape { + dim { + size: 16 + } + dim { + size: 64 + } + } + shape { + dim { + size: 16 + } + dim { + size: 20 + } + } + shape { + dim { + size: 16 + } + dim { + size: 20 + } + } + shape { + dim { + size: 16 + } + dim { + size: 20 + } + } + shape { + dim { + size: 16 + } + dim { + size: 1 + } + } + shape { + dim { + size: 16 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "output_types" + value { + list { + type: DT_INT32 + type: DT_INT32 + type: DT_INT32 + type: DT_INT32 + type: DT_FLOAT + type: DT_INT32 + type: DT_INT32 + } + } + } + attr { + key: "preserve_cardinality" + value { + b: true + } + } +} +node { + name: "optimizations" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 3 + } + } + string_val: "map_and_batch_fusion" + string_val: "noop_elimination" + string_val: "shuffle_and_repeat_fusion" + } + } + } +} +node { + name: "OptimizeDataset" + op: "OptimizeDataset" + input: "MapAndBatchDataset" + input: "optimizations" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "optimization_configs" + value { + list { + s: "map_vectorization:use_choose_fastest:false" + } + } + } + attr { + key: "output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + } + shape { + dim { + size: 16 + } + dim { + size: 64 + } + } + shape { + dim { + size: 16 + } + dim { + size: 20 + } + } + shape { + dim { + size: 16 + } + dim { + size: 20 + } + } + shape { + dim { + size: 16 + } + dim { + size: 20 + } + } + shape { + dim { + size: 16 + } + dim { + size: 1 + } + } + shape { + dim { + size: 16 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "output_types" + value { + list { + type: DT_INT32 + type: DT_INT32 + type: DT_INT32 + type: DT_INT32 + type: DT_FLOAT + type: DT_INT32 + type: DT_INT32 + } + } + } +} +node { + name: "ModelDataset" + op: "ModelDataset" + input: "OptimizeDataset" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "algorithm" + value { + i: 0 + } + } + attr { + key: "cpu_budget" + value { + i: 0 + } + } + attr { + key: "output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + } + shape { + dim { + size: 16 + } + dim { + size: 64 + } + } + shape { + dim { + size: 16 + } + dim { + size: 20 + } + } + shape { + dim { + size: 16 + } + dim { + size: 20 + } + } + shape { + dim { + size: 16 + } + dim { + size: 20 + } + } + shape { + dim { + size: 16 + } + dim { + size: 1 + } + } + shape { + dim { + size: 16 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "output_types" + value { + list { + type: DT_INT32 + type: DT_INT32 + type: DT_INT32 + type: DT_INT32 + type: DT_FLOAT + type: DT_INT32 + type: DT_INT32 + } + } + } +} +node { + name: "IteratorV2" + op: "IteratorV2" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + } + shape { + dim { + size: 16 + } + dim { + size: 64 + } + } + shape { + dim { + size: 16 + } + dim { + size: 20 + } + } + shape { + dim { + size: 16 + } + dim { + size: 20 + } + } + shape { + dim { + size: 16 + } + dim { + size: 20 + } + } + shape { + dim { + size: 16 + } + dim { + size: 1 + } + } + shape { + dim { + size: 16 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "output_types" + value { + list { + type: DT_INT32 + type: DT_INT32 + type: DT_INT32 + type: DT_INT32 + type: DT_FLOAT + type: DT_INT32 + type: DT_INT32 + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "MakeIterator" + op: "MakeIterator" + input: "ModelDataset" + input: "IteratorV2" + attr { + key: "_class" + value { + list { + s: "loc:@IteratorV2" + } + } + } +} +node { + name: "IteratorToStringHandle" + op: "IteratorToStringHandle" + input: "IteratorV2" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "IteratorGetNext" + op: "IteratorGetNext" + input: "IteratorV2" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + } + shape { + dim { + size: 16 + } + dim { + size: 64 + } + } + shape { + dim { + size: 16 + } + dim { + size: 20 + } + } + shape { + dim { + size: 16 + } + dim { + size: 20 + } + } + shape { + dim { + size: 16 + } + dim { + size: 20 + } + } + shape { + dim { + size: 16 + } + dim { + size: 1 + } + } + shape { + dim { + size: 16 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + } + shape { + dim { + size: 16 + } + dim { + size: 64 + } + } + shape { + dim { + size: 16 + } + dim { + size: 20 + } + } + shape { + dim { + size: 16 + } + dim { + size: 20 + } + } + shape { + dim { + size: 16 + } + dim { + size: 20 + } + } + shape { + dim { + size: 16 + } + dim { + size: 1 + } + } + shape { + dim { + size: 16 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "output_types" + value { + list { + type: DT_INT32 + type: DT_INT32 + type: DT_INT32 + type: DT_INT32 + type: DT_FLOAT + type: DT_INT32 + type: DT_INT32 + } + } + } +} +node { + name: "bert/embeddings/ExpandDims/dim" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: -1 + } + } + } +} +node { + name: "bert/embeddings/ExpandDims" + op: "ExpandDims" + input: "IteratorGetNext" + input: "bert/embeddings/ExpandDims/dim" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "Tdim" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/embeddings/word_embeddings/Initializer/truncated_normal/shape" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/word_embeddings" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\326\234\001\000\200\000\000\000" + } + } + } +} +node { + name: "bert/embeddings/word_embeddings/Initializer/truncated_normal/mean" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/word_embeddings" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/embeddings/word_embeddings/Initializer/truncated_normal/stddev" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/word_embeddings" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.019999999552965164 + } + } + } +} +node { + name: "bert/embeddings/word_embeddings/Initializer/truncated_normal/TruncatedNormal" + op: "TruncatedNormal" + input: "bert/embeddings/word_embeddings/Initializer/truncated_normal/shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/word_embeddings" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 105686 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/embeddings/word_embeddings/Initializer/truncated_normal/mul" + op: "Mul" + input: "bert/embeddings/word_embeddings/Initializer/truncated_normal/TruncatedNormal" + input: "bert/embeddings/word_embeddings/Initializer/truncated_normal/stddev" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/word_embeddings" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 105686 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/embeddings/word_embeddings/Initializer/truncated_normal" + op: "Add" + input: "bert/embeddings/word_embeddings/Initializer/truncated_normal/mul" + input: "bert/embeddings/word_embeddings/Initializer/truncated_normal/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/word_embeddings" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 105686 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/embeddings/word_embeddings" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/word_embeddings" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 105686 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 105686 + } + dim { + size: 128 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/embeddings/word_embeddings/Assign" + op: "Assign" + input: "bert/embeddings/word_embeddings" + input: "bert/embeddings/word_embeddings/Initializer/truncated_normal" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/word_embeddings" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 105686 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/embeddings/word_embeddings/read" + op: "Identity" + input: "bert/embeddings/word_embeddings" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/word_embeddings" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 105686 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/embeddings/embedding_lookup/axis" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/word_embeddings" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 0 + } + } + } +} +node { + name: "bert/embeddings/embedding_lookup" + op: "GatherV2" + input: "bert/embeddings/word_embeddings/read" + input: "bert/embeddings/ExpandDims" + input: "bert/embeddings/embedding_lookup/axis" + attr { + key: "Taxis" + value { + type: DT_INT32 + } + } + attr { + key: "Tindices" + value { + type: DT_INT32 + } + } + attr { + key: "Tparams" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/word_embeddings" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "batch_dims" + value { + i: 0 + } + } +} +node { + name: "bert/embeddings/embedding_lookup/Identity" + op: "Identity" + input: "bert/embeddings/embedding_lookup" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/embeddings/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\200\000\000\000" + } + } + } +} +node { + name: "bert/embeddings/Reshape" + op: "Reshape" + input: "bert/embeddings/embedding_lookup/Identity" + input: "bert/embeddings/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/embeddings/token_type_embeddings/Initializer/truncated_normal/shape" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/token_type_embeddings" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\002\000\000\000\200\000\000\000" + } + } + } +} +node { + name: "bert/embeddings/token_type_embeddings/Initializer/truncated_normal/mean" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/token_type_embeddings" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/embeddings/token_type_embeddings/Initializer/truncated_normal/stddev" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/token_type_embeddings" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.019999999552965164 + } + } + } +} +node { + name: "bert/embeddings/token_type_embeddings/Initializer/truncated_normal/TruncatedNormal" + op: "TruncatedNormal" + input: "bert/embeddings/token_type_embeddings/Initializer/truncated_normal/shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/token_type_embeddings" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/embeddings/token_type_embeddings/Initializer/truncated_normal/mul" + op: "Mul" + input: "bert/embeddings/token_type_embeddings/Initializer/truncated_normal/TruncatedNormal" + input: "bert/embeddings/token_type_embeddings/Initializer/truncated_normal/stddev" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/token_type_embeddings" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/embeddings/token_type_embeddings/Initializer/truncated_normal" + op: "Add" + input: "bert/embeddings/token_type_embeddings/Initializer/truncated_normal/mul" + input: "bert/embeddings/token_type_embeddings/Initializer/truncated_normal/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/token_type_embeddings" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/embeddings/token_type_embeddings" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/token_type_embeddings" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 2 + } + dim { + size: 128 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/embeddings/token_type_embeddings/Assign" + op: "Assign" + input: "bert/embeddings/token_type_embeddings" + input: "bert/embeddings/token_type_embeddings/Initializer/truncated_normal" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/token_type_embeddings" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/embeddings/token_type_embeddings/read" + op: "Identity" + input: "bert/embeddings/token_type_embeddings" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/token_type_embeddings" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/embeddings/embedding_lookup_1/axis" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/token_type_embeddings" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 0 + } + } + } +} +node { + name: "bert/embeddings/embedding_lookup_1" + op: "GatherV2" + input: "bert/embeddings/token_type_embeddings/read" + input: "IteratorGetNext:6" + input: "bert/embeddings/embedding_lookup_1/axis" + attr { + key: "Taxis" + value { + type: DT_INT32 + } + } + attr { + key: "Tindices" + value { + type: DT_INT32 + } + } + attr { + key: "Tparams" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/token_type_embeddings" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "batch_dims" + value { + i: 0 + } + } +} +node { + name: "bert/embeddings/embedding_lookup_1/Identity" + op: "Identity" + input: "bert/embeddings/embedding_lookup_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/embeddings/add" + op: "AddV2" + input: "bert/embeddings/Reshape" + input: "bert/embeddings/embedding_lookup_1/Identity" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/embeddings/assert_less_equal/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 64 + } + } + } +} +node { + name: "bert/embeddings/assert_less_equal/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 512 + } + } + } +} +node { + name: "bert/embeddings/assert_less_equal/LessEqual" + op: "LessEqual" + input: "bert/embeddings/assert_less_equal/x" + input: "bert/embeddings/assert_less_equal/y" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "bert/embeddings/assert_less_equal/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "bert/embeddings/assert_less_equal/All" + op: "All" + input: "bert/embeddings/assert_less_equal/LessEqual" + input: "bert/embeddings/assert_less_equal/Const" + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "bert/embeddings/assert_less_equal/Assert/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "Condition x <= y did not hold element-wise:" + } + } + } +} +node { + name: "bert/embeddings/assert_less_equal/Assert/Const_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "x (bert/embeddings/assert_less_equal/x:0) = " + } + } + } +} +node { + name: "bert/embeddings/assert_less_equal/Assert/Const_2" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "y (bert/embeddings/assert_less_equal/y:0) = " + } + } + } +} +node { + name: "bert/embeddings/assert_less_equal/Assert/Assert/data_0" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "Condition x <= y did not hold element-wise:" + } + } + } +} +node { + name: "bert/embeddings/assert_less_equal/Assert/Assert/data_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "x (bert/embeddings/assert_less_equal/x:0) = " + } + } + } +} +node { + name: "bert/embeddings/assert_less_equal/Assert/Assert/data_3" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "y (bert/embeddings/assert_less_equal/y:0) = " + } + } + } +} +node { + name: "bert/embeddings/assert_less_equal/Assert/Assert" + op: "Assert" + input: "bert/embeddings/assert_less_equal/All" + input: "bert/embeddings/assert_less_equal/Assert/Assert/data_0" + input: "bert/embeddings/assert_less_equal/Assert/Assert/data_1" + input: "bert/embeddings/assert_less_equal/x" + input: "bert/embeddings/assert_less_equal/Assert/Assert/data_3" + input: "bert/embeddings/assert_less_equal/y" + attr { + key: "T" + value { + list { + type: DT_STRING + type: DT_STRING + type: DT_INT32 + type: DT_STRING + type: DT_INT32 + } + } + } + attr { + key: "summarize" + value { + i: 3 + } + } +} +node { + name: "bert/embeddings/position_embeddings/Initializer/truncated_normal/shape" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/position_embeddings" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\002\000\000\200\000\000\000" + } + } + } +} +node { + name: "bert/embeddings/position_embeddings/Initializer/truncated_normal/mean" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/position_embeddings" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/embeddings/position_embeddings/Initializer/truncated_normal/stddev" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/position_embeddings" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.019999999552965164 + } + } + } +} +node { + name: "bert/embeddings/position_embeddings/Initializer/truncated_normal/TruncatedNormal" + op: "TruncatedNormal" + input: "bert/embeddings/position_embeddings/Initializer/truncated_normal/shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/position_embeddings" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 512 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/embeddings/position_embeddings/Initializer/truncated_normal/mul" + op: "Mul" + input: "bert/embeddings/position_embeddings/Initializer/truncated_normal/TruncatedNormal" + input: "bert/embeddings/position_embeddings/Initializer/truncated_normal/stddev" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/position_embeddings" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 512 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/embeddings/position_embeddings/Initializer/truncated_normal" + op: "Add" + input: "bert/embeddings/position_embeddings/Initializer/truncated_normal/mul" + input: "bert/embeddings/position_embeddings/Initializer/truncated_normal/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/position_embeddings" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 512 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/embeddings/position_embeddings" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/position_embeddings" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 512 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 512 + } + dim { + size: 128 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/embeddings/position_embeddings/Assign" + op: "Assign" + input: "bert/embeddings/position_embeddings" + input: "bert/embeddings/position_embeddings/Initializer/truncated_normal" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/position_embeddings" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 512 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/embeddings/position_embeddings/read" + op: "Identity" + input: "bert/embeddings/position_embeddings" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/position_embeddings" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 512 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/embeddings/Slice/begin" + op: "Const" + input: "^bert/embeddings/assert_less_equal/Assert/Assert" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\000\000\000\000\000\000\000" + } + } + } +} +node { + name: "bert/embeddings/Slice/size" + op: "Const" + input: "^bert/embeddings/assert_less_equal/Assert/Assert" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "@\000\000\000\377\377\377\377" + } + } + } +} +node { + name: "bert/embeddings/Slice" + op: "Slice" + input: "bert/embeddings/position_embeddings/read" + input: "bert/embeddings/Slice/begin" + input: "bert/embeddings/Slice/size" + attr { + key: "Index" + value { + type: DT_INT32 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 64 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/embeddings/Reshape_1/shape" + op: "Const" + input: "^bert/embeddings/assert_less_equal/Assert/Assert" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\001\000\000\000@\000\000\000\200\000\000\000" + } + } + } +} +node { + name: "bert/embeddings/Reshape_1" + op: "Reshape" + input: "bert/embeddings/Slice" + input: "bert/embeddings/Reshape_1/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 64 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/embeddings/add_1" + op: "AddV2" + input: "bert/embeddings/add" + input: "bert/embeddings/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/embeddings/LayerNorm/beta/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 128 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 128 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/embeddings/LayerNorm/beta" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 128 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 128 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/embeddings/LayerNorm/beta/Assign" + op: "Assign" + input: "bert/embeddings/LayerNorm/beta" + input: "bert/embeddings/LayerNorm/beta/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 128 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/embeddings/LayerNorm/beta/read" + op: "Identity" + input: "bert/embeddings/LayerNorm/beta" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/embeddings/LayerNorm/gamma/Initializer/ones" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 128 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 128 + } + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/embeddings/LayerNorm/gamma" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 128 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 128 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/embeddings/LayerNorm/gamma/Assign" + op: "Assign" + input: "bert/embeddings/LayerNorm/gamma" + input: "bert/embeddings/LayerNorm/gamma/Initializer/ones" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 128 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/embeddings/LayerNorm/gamma/read" + op: "Identity" + input: "bert/embeddings/LayerNorm/gamma" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/embeddings/LayerNorm/moments/mean/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 2 + } + } + } +} +node { + name: "bert/embeddings/LayerNorm/moments/mean" + op: "Mean" + input: "bert/embeddings/add_1" + input: "bert/embeddings/LayerNorm/moments/mean/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: true + } + } +} +node { + name: "bert/embeddings/LayerNorm/moments/StopGradient" + op: "StopGradient" + input: "bert/embeddings/LayerNorm/moments/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/embeddings/LayerNorm/moments/SquaredDifference" + op: "SquaredDifference" + input: "bert/embeddings/add_1" + input: "bert/embeddings/LayerNorm/moments/StopGradient" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/embeddings/LayerNorm/moments/variance/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 2 + } + } + } +} +node { + name: "bert/embeddings/LayerNorm/moments/variance" + op: "Mean" + input: "bert/embeddings/LayerNorm/moments/SquaredDifference" + input: "bert/embeddings/LayerNorm/moments/variance/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: true + } + } +} +node { + name: "bert/embeddings/LayerNorm/batchnorm/add/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999960041972e-13 + } + } + } +} +node { + name: "bert/embeddings/LayerNorm/batchnorm/add" + op: "AddV2" + input: "bert/embeddings/LayerNorm/moments/variance" + input: "bert/embeddings/LayerNorm/batchnorm/add/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/embeddings/LayerNorm/batchnorm/Rsqrt" + op: "Rsqrt" + input: "bert/embeddings/LayerNorm/batchnorm/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/embeddings/LayerNorm/batchnorm/mul" + op: "Mul" + input: "bert/embeddings/LayerNorm/batchnorm/Rsqrt" + input: "bert/embeddings/LayerNorm/gamma/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/embeddings/LayerNorm/batchnorm/mul_1" + op: "Mul" + input: "bert/embeddings/add_1" + input: "bert/embeddings/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/embeddings/LayerNorm/batchnorm/mul_2" + op: "Mul" + input: "bert/embeddings/LayerNorm/moments/mean" + input: "bert/embeddings/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/embeddings/LayerNorm/batchnorm/sub" + op: "Sub" + input: "bert/embeddings/LayerNorm/beta/read" + input: "bert/embeddings/LayerNorm/batchnorm/mul_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/embeddings/LayerNorm/batchnorm/add_1" + op: "AddV2" + input: "bert/embeddings/LayerNorm/batchnorm/mul_1" + input: "bert/embeddings/LayerNorm/batchnorm/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/embedding_hidden_mapping_in/kernel/Initializer/truncated_normal/shape" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/embedding_hidden_mapping_in/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\200\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/embedding_hidden_mapping_in/kernel/Initializer/truncated_normal/mean" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/embedding_hidden_mapping_in/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/embedding_hidden_mapping_in/kernel/Initializer/truncated_normal/stddev" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/embedding_hidden_mapping_in/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.019999999552965164 + } + } + } +} +node { + name: "bert/encoder/embedding_hidden_mapping_in/kernel/Initializer/truncated_normal/TruncatedNormal" + op: "TruncatedNormal" + input: "bert/encoder/embedding_hidden_mapping_in/kernel/Initializer/truncated_normal/shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/embedding_hidden_mapping_in/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 128 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/encoder/embedding_hidden_mapping_in/kernel/Initializer/truncated_normal/mul" + op: "Mul" + input: "bert/encoder/embedding_hidden_mapping_in/kernel/Initializer/truncated_normal/TruncatedNormal" + input: "bert/encoder/embedding_hidden_mapping_in/kernel/Initializer/truncated_normal/stddev" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/embedding_hidden_mapping_in/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 128 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/embedding_hidden_mapping_in/kernel/Initializer/truncated_normal" + op: "Add" + input: "bert/encoder/embedding_hidden_mapping_in/kernel/Initializer/truncated_normal/mul" + input: "bert/encoder/embedding_hidden_mapping_in/kernel/Initializer/truncated_normal/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/embedding_hidden_mapping_in/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 128 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/embedding_hidden_mapping_in/kernel" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/embedding_hidden_mapping_in/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 128 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 128 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/embedding_hidden_mapping_in/kernel/Assign" + op: "Assign" + input: "bert/encoder/embedding_hidden_mapping_in/kernel" + input: "bert/encoder/embedding_hidden_mapping_in/kernel/Initializer/truncated_normal" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/embedding_hidden_mapping_in/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 128 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/embedding_hidden_mapping_in/kernel/read" + op: "Identity" + input: "bert/encoder/embedding_hidden_mapping_in/kernel" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/embedding_hidden_mapping_in/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 128 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/embedding_hidden_mapping_in/bias/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/embedding_hidden_mapping_in/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/embedding_hidden_mapping_in/bias" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/embedding_hidden_mapping_in/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/embedding_hidden_mapping_in/bias/Assign" + op: "Assign" + input: "bert/encoder/embedding_hidden_mapping_in/bias" + input: "bert/encoder/embedding_hidden_mapping_in/bias/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/embedding_hidden_mapping_in/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/embedding_hidden_mapping_in/bias/read" + op: "Identity" + input: "bert/encoder/embedding_hidden_mapping_in/bias" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/embedding_hidden_mapping_in/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/embedding_hidden_mapping_in/einsum/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\004\000\000\200\000\000\000" + } + } + } +} +node { + name: "bert/encoder/embedding_hidden_mapping_in/einsum/Reshape" + op: "Reshape" + input: "bert/embeddings/LayerNorm/batchnorm/add_1" + input: "bert/encoder/embedding_hidden_mapping_in/einsum/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/encoder/embedding_hidden_mapping_in/einsum/MatMul" + op: "MatMul" + input: "bert/encoder/embedding_hidden_mapping_in/einsum/Reshape" + input: "bert/encoder/embedding_hidden_mapping_in/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "bert/encoder/embedding_hidden_mapping_in/einsum/Reshape_1/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/embedding_hidden_mapping_in/einsum/Reshape_1" + op: "Reshape" + input: "bert/encoder/embedding_hidden_mapping_in/einsum/MatMul" + input: "bert/encoder/embedding_hidden_mapping_in/einsum/Reshape_1/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/embedding_hidden_mapping_in/add" + op: "AddV2" + input: "bert/encoder/embedding_hidden_mapping_in/einsum/Reshape_1" + input: "bert/encoder/embedding_hidden_mapping_in/bias/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/kernel/Initializer/truncated_normal/shape" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/kernel/Initializer/truncated_normal/mean" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/kernel/Initializer/truncated_normal/stddev" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.019999999552965164 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/kernel/Initializer/truncated_normal/TruncatedNormal" + op: "TruncatedNormal" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/kernel/Initializer/truncated_normal/shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/kernel/Initializer/truncated_normal/mul" + op: "Mul" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/kernel/Initializer/truncated_normal/TruncatedNormal" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/kernel/Initializer/truncated_normal/stddev" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/kernel/Initializer/truncated_normal" + op: "Add" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/kernel/Initializer/truncated_normal/mul" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/kernel/Initializer/truncated_normal/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/kernel" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/kernel/Assign" + op: "Assign" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/kernel" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/kernel/Initializer/truncated_normal" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/kernel/read" + op: "Identity" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/kernel" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/query/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\000\003\000\000\014\000\000\000@\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/query/Reshape" + op: "Reshape" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/kernel/read" + input: "bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/query/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/bias/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/bias" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/bias/Assign" + op: "Assign" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/bias" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/bias/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/bias/read" + op: "Identity" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/bias" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/query/Reshape_1/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\014\000\000\000@\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/query/Reshape_1" + op: "Reshape" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/bias/read" + input: "bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/query/Reshape_1/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/query/einsum/transpose/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/query/einsum/transpose" + op: "Transpose" + input: "bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/query/Reshape" + input: "bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/query/einsum/transpose/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 64 + } + dim { + size: 12 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/query/einsum/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\004\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/query/einsum/Reshape" + op: "Reshape" + input: "bert/encoder/embedding_hidden_mapping_in/add" + input: "bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/query/einsum/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/query/einsum/Reshape_1/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/query/einsum/Reshape_1" + op: "Reshape" + input: "bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/query/einsum/transpose" + input: "bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/query/einsum/Reshape_1/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/query/einsum/MatMul" + op: "MatMul" + input: "bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/query/einsum/Reshape" + input: "bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/query/einsum/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/query/einsum/Reshape_2/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\020\000\000\000@\000\000\000@\000\000\000\014\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/query/einsum/Reshape_2" + op: "Reshape" + input: "bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/query/einsum/MatMul" + input: "bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/query/einsum/Reshape_2/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 64 + } + dim { + size: 12 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/query/einsum/transpose_1/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\000\000\000\000\001\000\000\000\003\000\000\000\002\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/query/einsum/transpose_1" + op: "Transpose" + input: "bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/query/einsum/Reshape_2" + input: "bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/query/einsum/transpose_1/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/query/add" + op: "AddV2" + input: "bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/query/einsum/transpose_1" + input: "bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/query/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/kernel/Initializer/truncated_normal/shape" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/kernel/Initializer/truncated_normal/mean" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/kernel/Initializer/truncated_normal/stddev" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.019999999552965164 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/kernel/Initializer/truncated_normal/TruncatedNormal" + op: "TruncatedNormal" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/kernel/Initializer/truncated_normal/shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/kernel/Initializer/truncated_normal/mul" + op: "Mul" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/kernel/Initializer/truncated_normal/TruncatedNormal" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/kernel/Initializer/truncated_normal/stddev" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/kernel/Initializer/truncated_normal" + op: "Add" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/kernel/Initializer/truncated_normal/mul" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/kernel/Initializer/truncated_normal/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/kernel" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/kernel/Assign" + op: "Assign" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/kernel" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/kernel/Initializer/truncated_normal" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/kernel/read" + op: "Identity" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/kernel" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/key/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\000\003\000\000\014\000\000\000@\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/key/Reshape" + op: "Reshape" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/kernel/read" + input: "bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/key/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/bias/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/bias" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/bias/Assign" + op: "Assign" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/bias" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/bias/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/bias/read" + op: "Identity" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/bias" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/key/Reshape_1/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\014\000\000\000@\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/key/Reshape_1" + op: "Reshape" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/bias/read" + input: "bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/key/Reshape_1/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/key/einsum/transpose/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/key/einsum/transpose" + op: "Transpose" + input: "bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/key/Reshape" + input: "bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/key/einsum/transpose/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 64 + } + dim { + size: 12 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/key/einsum/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\004\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/key/einsum/Reshape" + op: "Reshape" + input: "bert/encoder/embedding_hidden_mapping_in/add" + input: "bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/key/einsum/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/key/einsum/Reshape_1/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/key/einsum/Reshape_1" + op: "Reshape" + input: "bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/key/einsum/transpose" + input: "bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/key/einsum/Reshape_1/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/key/einsum/MatMul" + op: "MatMul" + input: "bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/key/einsum/Reshape" + input: "bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/key/einsum/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/key/einsum/Reshape_2/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\020\000\000\000@\000\000\000@\000\000\000\014\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/key/einsum/Reshape_2" + op: "Reshape" + input: "bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/key/einsum/MatMul" + input: "bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/key/einsum/Reshape_2/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 64 + } + dim { + size: 12 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/key/einsum/transpose_1/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\000\000\000\000\001\000\000\000\003\000\000\000\002\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/key/einsum/transpose_1" + op: "Transpose" + input: "bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/key/einsum/Reshape_2" + input: "bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/key/einsum/transpose_1/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/key/add" + op: "AddV2" + input: "bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/key/einsum/transpose_1" + input: "bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/key/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/value/kernel/Initializer/truncated_normal/shape" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/value/kernel/Initializer/truncated_normal/mean" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/value/kernel/Initializer/truncated_normal/stddev" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.019999999552965164 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/value/kernel/Initializer/truncated_normal/TruncatedNormal" + op: "TruncatedNormal" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/value/kernel/Initializer/truncated_normal/shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/value/kernel/Initializer/truncated_normal/mul" + op: "Mul" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/value/kernel/Initializer/truncated_normal/TruncatedNormal" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/value/kernel/Initializer/truncated_normal/stddev" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/value/kernel/Initializer/truncated_normal" + op: "Add" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/value/kernel/Initializer/truncated_normal/mul" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/value/kernel/Initializer/truncated_normal/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/value/kernel" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/value/kernel/Assign" + op: "Assign" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/value/kernel" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/value/kernel/Initializer/truncated_normal" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/value/kernel/read" + op: "Identity" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/value/kernel" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/value/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\000\003\000\000\014\000\000\000@\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/value/Reshape" + op: "Reshape" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/value/kernel/read" + input: "bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/value/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/value/bias/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/self/value/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/value/bias" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/self/value/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/value/bias/Assign" + op: "Assign" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/value/bias" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/value/bias/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/self/value/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/value/bias/read" + op: "Identity" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/value/bias" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/self/value/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/value/Reshape_1/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\014\000\000\000@\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/value/Reshape_1" + op: "Reshape" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/value/bias/read" + input: "bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/value/Reshape_1/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/value/einsum/transpose/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/value/einsum/transpose" + op: "Transpose" + input: "bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/value/Reshape" + input: "bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/value/einsum/transpose/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 64 + } + dim { + size: 12 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/value/einsum/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\004\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/value/einsum/Reshape" + op: "Reshape" + input: "bert/encoder/embedding_hidden_mapping_in/add" + input: "bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/value/einsum/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/value/einsum/Reshape_1/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/value/einsum/Reshape_1" + op: "Reshape" + input: "bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/value/einsum/transpose" + input: "bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/value/einsum/Reshape_1/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/value/einsum/MatMul" + op: "MatMul" + input: "bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/value/einsum/Reshape" + input: "bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/value/einsum/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/value/einsum/Reshape_2/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\020\000\000\000@\000\000\000@\000\000\000\014\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/value/einsum/Reshape_2" + op: "Reshape" + input: "bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/value/einsum/MatMul" + input: "bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/value/einsum/Reshape_2/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 64 + } + dim { + size: 12 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/value/einsum/transpose_1/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\000\000\000\000\001\000\000\000\003\000\000\000\002\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/value/einsum/transpose_1" + op: "Transpose" + input: "bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/value/einsum/Reshape_2" + input: "bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/value/einsum/transpose_1/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/value/add" + op: "AddV2" + input: "bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/value/einsum/transpose_1" + input: "bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/value/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/transpose/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000\003\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/transpose" + op: "Transpose" + input: "bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/query/add" + input: "bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/transpose/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/transpose_1/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000\003\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/transpose_1" + op: "Transpose" + input: "bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/key/add" + input: "bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/transpose_1/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/transpose_2/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000\003\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/transpose_2" + op: "Transpose" + input: "bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/value/add" + input: "bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/transpose_2/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\020\000\000\000\001\000\000\000@\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/Reshape" + op: "Reshape" + input: "IteratorGetNext:1" + input: "bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/Reshape/shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 1 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/MatMul" + op: "BatchMatMulV2" + input: "bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/transpose" + input: "bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/transpose_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: false + } + } + attr { + key: "adj_y" + value { + b: true + } + } +} +node { + name: "bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/Mul/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.125 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/Mul" + op: "Mul" + input: "bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/MatMul" + input: "bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/Mul/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/ones/shape_as_tensor" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\020\000\000\000\001\000\000\000@\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/ones/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/ones" + op: "Fill" + input: "bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/ones/shape_as_tensor" + input: "bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/ones/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 1 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/Cast" + op: "Cast" + input: "bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/Reshape" + attr { + key: "DstT" + value { + type: DT_FLOAT + } + } + attr { + key: "SrcT" + value { + type: DT_INT32 + } + } + attr { + key: "Truncate" + value { + b: false + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 1 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/MatMul_1" + op: "BatchMatMulV2" + input: "bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/ones" + input: "bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/Cast" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 1 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: false + } + } + attr { + key: "adj_y" + value { + b: true + } + } +} +node { + name: "bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/sub/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/sub" + op: "Sub" + input: "bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/sub/x" + input: "bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/MatMul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 1 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/mul_1/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: -10000.0 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/mul_1" + op: "Mul" + input: "bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/sub" + input: "bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/mul_1/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 1 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/add" + op: "AddV2" + input: "bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/Mul" + input: "bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/mul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/attention_probs" + op: "Softmax" + input: "bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/MatMul_2" + op: "BatchMatMulV2" + input: "bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/attention_probs" + input: "bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/transpose_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: false + } + } + attr { + key: "adj_y" + value { + b: false + } + } +} +node { + name: "bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/transpose_3/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000\003\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/transpose_3" + op: "Transpose" + input: "bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/MatMul_2" + input: "bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/transpose_3/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/attention_1/output/dense/kernel/Initializer/truncated_normal/shape" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/attention_1/output/dense/kernel/Initializer/truncated_normal/mean" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/attention_1/output/dense/kernel/Initializer/truncated_normal/stddev" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.019999999552965164 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/attention_1/output/dense/kernel/Initializer/truncated_normal/TruncatedNormal" + op: "TruncatedNormal" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/output/dense/kernel/Initializer/truncated_normal/shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/attention_1/output/dense/kernel/Initializer/truncated_normal/mul" + op: "Mul" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/output/dense/kernel/Initializer/truncated_normal/TruncatedNormal" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/output/dense/kernel/Initializer/truncated_normal/stddev" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/attention_1/output/dense/kernel/Initializer/truncated_normal" + op: "Add" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/output/dense/kernel/Initializer/truncated_normal/mul" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/output/dense/kernel/Initializer/truncated_normal/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/attention_1/output/dense/kernel" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/attention_1/output/dense/kernel/Assign" + op: "Assign" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/output/dense/kernel" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/output/dense/kernel/Initializer/truncated_normal" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/attention_1/output/dense/kernel/read" + op: "Identity" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/output/dense/kernel" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/output/dense/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\014\000\000\000@\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/output/dense/Reshape" + op: "Reshape" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/output/dense/kernel/read" + input: "bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/output/dense/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/attention_1/output/dense/bias/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/attention_1/output/dense/bias" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/attention_1/output/dense/bias/Assign" + op: "Assign" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/output/dense/bias" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/output/dense/bias/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/attention_1/output/dense/bias/read" + op: "Identity" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/output/dense/bias" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/output/dense/einsum/transpose/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\000\000\000\000\001\000\000\000\003\000\000\000\002\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/output/dense/einsum/transpose" + op: "Transpose" + input: "bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/transpose_3" + input: "bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/output/dense/einsum/transpose/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 64 + } + dim { + size: 12 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/output/dense/einsum/transpose_1/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\001\000\000\000\000\000\000\000\002\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/output/dense/einsum/transpose_1" + op: "Transpose" + input: "bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/output/dense/Reshape" + input: "bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/output/dense/einsum/transpose_1/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 64 + } + dim { + size: 12 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/output/dense/einsum/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\004\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/output/dense/einsum/Reshape" + op: "Reshape" + input: "bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/output/dense/einsum/transpose" + input: "bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/output/dense/einsum/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/output/dense/einsum/Reshape_1/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/output/dense/einsum/Reshape_1" + op: "Reshape" + input: "bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/output/dense/einsum/transpose_1" + input: "bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/output/dense/einsum/Reshape_1/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/output/dense/einsum/MatMul" + op: "MatMul" + input: "bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/output/dense/einsum/Reshape" + input: "bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/output/dense/einsum/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/output/dense/einsum/Reshape_2/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/output/dense/einsum/Reshape_2" + op: "Reshape" + input: "bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/output/dense/einsum/MatMul" + input: "bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/output/dense/einsum/Reshape_2/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/output/dense/add" + op: "AddV2" + input: "bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/output/dense/einsum/Reshape_2" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/output/dense/bias/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/layer_0/inner_group_0/add" + op: "AddV2" + input: "bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/output/dense/add" + input: "bert/encoder/embedding_hidden_mapping_in/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm/beta/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm/beta" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm/beta/Assign" + op: "Assign" + input: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm/beta" + input: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm/beta/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm/beta/read" + op: "Identity" + input: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm/beta" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm/gamma/Initializer/ones" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm/gamma" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm/gamma/Assign" + op: "Assign" + input: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm/gamma" + input: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm/gamma/Initializer/ones" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm/gamma/read" + op: "Identity" + input: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm/gamma" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm/moments/mean/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 2 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm/moments/mean" + op: "Mean" + input: "bert/encoder/transformer/group_0/layer_0/inner_group_0/add" + input: "bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm/moments/mean/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: true + } + } +} +node { + name: "bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm/moments/StopGradient" + op: "StopGradient" + input: "bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm/moments/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm/moments/SquaredDifference" + op: "SquaredDifference" + input: "bert/encoder/transformer/group_0/layer_0/inner_group_0/add" + input: "bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm/moments/StopGradient" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm/moments/variance/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 2 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm/moments/variance" + op: "Mean" + input: "bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm/moments/SquaredDifference" + input: "bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm/moments/variance/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: true + } + } +} +node { + name: "bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm/batchnorm/add/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999960041972e-13 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm/batchnorm/add" + op: "AddV2" + input: "bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm/moments/variance" + input: "bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm/batchnorm/add/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm/batchnorm/Rsqrt" + op: "Rsqrt" + input: "bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm/batchnorm/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm/batchnorm/mul" + op: "Mul" + input: "bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm/batchnorm/Rsqrt" + input: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm/gamma/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm/batchnorm/mul_1" + op: "Mul" + input: "bert/encoder/transformer/group_0/layer_0/inner_group_0/add" + input: "bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm/batchnorm/mul_2" + op: "Mul" + input: "bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm/moments/mean" + input: "bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm/batchnorm/sub" + op: "Sub" + input: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm/beta/read" + input: "bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm/batchnorm/mul_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm/batchnorm/add_1" + op: "AddV2" + input: "bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm/batchnorm/mul_1" + input: "bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm/batchnorm/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/kernel/Initializer/truncated_normal/shape" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\014\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/kernel/Initializer/truncated_normal/mean" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/kernel/Initializer/truncated_normal/stddev" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.019999999552965164 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/kernel/Initializer/truncated_normal/TruncatedNormal" + op: "TruncatedNormal" + input: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/kernel/Initializer/truncated_normal/shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/kernel/Initializer/truncated_normal/mul" + op: "Mul" + input: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/kernel/Initializer/truncated_normal/TruncatedNormal" + input: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/kernel/Initializer/truncated_normal/stddev" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/kernel/Initializer/truncated_normal" + op: "Add" + input: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/kernel/Initializer/truncated_normal/mul" + input: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/kernel/Initializer/truncated_normal/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/kernel" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/kernel/Assign" + op: "Assign" + input: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/kernel" + input: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/kernel/Initializer/truncated_normal" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/kernel/read" + op: "Identity" + input: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/kernel" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/bias/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 3072 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/bias/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/bias/Initializer/zeros" + op: "Fill" + input: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/bias/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/bias/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/bias" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 3072 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/bias/Assign" + op: "Assign" + input: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/bias" + input: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/bias/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/bias/read" + op: "Identity" + input: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/bias" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/layer_0/inner_group_0/ffn_1/intermediate/dense/einsum/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\004\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/layer_0/inner_group_0/ffn_1/intermediate/dense/einsum/Reshape" + op: "Reshape" + input: "bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm/batchnorm/add_1" + input: "bert/encoder/transformer/group_0/layer_0/inner_group_0/ffn_1/intermediate/dense/einsum/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/layer_0/inner_group_0/ffn_1/intermediate/dense/einsum/MatMul" + op: "MatMul" + input: "bert/encoder/transformer/group_0/layer_0/inner_group_0/ffn_1/intermediate/dense/einsum/Reshape" + input: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "bert/encoder/transformer/group_0/layer_0/inner_group_0/ffn_1/intermediate/dense/einsum/Reshape_1/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\000\014\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/layer_0/inner_group_0/ffn_1/intermediate/dense/einsum/Reshape_1" + op: "Reshape" + input: "bert/encoder/transformer/group_0/layer_0/inner_group_0/ffn_1/intermediate/dense/einsum/MatMul" + input: "bert/encoder/transformer/group_0/layer_0/inner_group_0/ffn_1/intermediate/dense/einsum/Reshape_1/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/layer_0/inner_group_0/ffn_1/intermediate/dense/add" + op: "AddV2" + input: "bert/encoder/transformer/group_0/layer_0/inner_group_0/ffn_1/intermediate/dense/einsum/Reshape_1" + input: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/bias/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/layer_0/inner_group_0/ffn_1/intermediate/Pow/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 3.0 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/layer_0/inner_group_0/ffn_1/intermediate/Pow" + op: "Pow" + input: "bert/encoder/transformer/group_0/layer_0/inner_group_0/ffn_1/intermediate/dense/add" + input: "bert/encoder/transformer/group_0/layer_0/inner_group_0/ffn_1/intermediate/Pow/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/layer_0/inner_group_0/ffn_1/intermediate/mul/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.044714998453855515 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/layer_0/inner_group_0/ffn_1/intermediate/mul" + op: "Mul" + input: "bert/encoder/transformer/group_0/layer_0/inner_group_0/ffn_1/intermediate/mul/x" + input: "bert/encoder/transformer/group_0/layer_0/inner_group_0/ffn_1/intermediate/Pow" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/layer_0/inner_group_0/ffn_1/intermediate/add" + op: "AddV2" + input: "bert/encoder/transformer/group_0/layer_0/inner_group_0/ffn_1/intermediate/dense/add" + input: "bert/encoder/transformer/group_0/layer_0/inner_group_0/ffn_1/intermediate/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/layer_0/inner_group_0/ffn_1/intermediate/mul_1/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.7978845834732056 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/layer_0/inner_group_0/ffn_1/intermediate/mul_1" + op: "Mul" + input: "bert/encoder/transformer/group_0/layer_0/inner_group_0/ffn_1/intermediate/mul_1/x" + input: "bert/encoder/transformer/group_0/layer_0/inner_group_0/ffn_1/intermediate/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/layer_0/inner_group_0/ffn_1/intermediate/Tanh" + op: "Tanh" + input: "bert/encoder/transformer/group_0/layer_0/inner_group_0/ffn_1/intermediate/mul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/layer_0/inner_group_0/ffn_1/intermediate/add_1/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/layer_0/inner_group_0/ffn_1/intermediate/add_1" + op: "AddV2" + input: "bert/encoder/transformer/group_0/layer_0/inner_group_0/ffn_1/intermediate/add_1/x" + input: "bert/encoder/transformer/group_0/layer_0/inner_group_0/ffn_1/intermediate/Tanh" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/layer_0/inner_group_0/ffn_1/intermediate/mul_2/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.5 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/layer_0/inner_group_0/ffn_1/intermediate/mul_2" + op: "Mul" + input: "bert/encoder/transformer/group_0/layer_0/inner_group_0/ffn_1/intermediate/mul_2/x" + input: "bert/encoder/transformer/group_0/layer_0/inner_group_0/ffn_1/intermediate/add_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/layer_0/inner_group_0/ffn_1/intermediate/mul_3" + op: "Mul" + input: "bert/encoder/transformer/group_0/layer_0/inner_group_0/ffn_1/intermediate/dense/add" + input: "bert/encoder/transformer/group_0/layer_0/inner_group_0/ffn_1/intermediate/mul_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/kernel/Initializer/truncated_normal/shape" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\014\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/kernel/Initializer/truncated_normal/mean" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/kernel/Initializer/truncated_normal/stddev" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.019999999552965164 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/kernel/Initializer/truncated_normal/TruncatedNormal" + op: "TruncatedNormal" + input: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/kernel/Initializer/truncated_normal/shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/kernel/Initializer/truncated_normal/mul" + op: "Mul" + input: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/kernel/Initializer/truncated_normal/TruncatedNormal" + input: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/kernel/Initializer/truncated_normal/stddev" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/kernel/Initializer/truncated_normal" + op: "Add" + input: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/kernel/Initializer/truncated_normal/mul" + input: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/kernel/Initializer/truncated_normal/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/kernel" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/kernel/Assign" + op: "Assign" + input: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/kernel" + input: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/kernel/Initializer/truncated_normal" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/kernel/read" + op: "Identity" + input: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/kernel" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/bias/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/bias" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/bias/Assign" + op: "Assign" + input: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/bias" + input: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/bias/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/bias/read" + op: "Identity" + input: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/bias" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/layer_0/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\004\000\000\000\014\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/layer_0/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape" + op: "Reshape" + input: "bert/encoder/transformer/group_0/layer_0/inner_group_0/ffn_1/intermediate/mul_3" + input: "bert/encoder/transformer/group_0/layer_0/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/layer_0/inner_group_0/ffn_1/intermediate/output/dense/einsum/MatMul" + op: "MatMul" + input: "bert/encoder/transformer/group_0/layer_0/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape" + input: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "bert/encoder/transformer/group_0/layer_0/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape_1/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/layer_0/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape_1" + op: "Reshape" + input: "bert/encoder/transformer/group_0/layer_0/inner_group_0/ffn_1/intermediate/output/dense/einsum/MatMul" + input: "bert/encoder/transformer/group_0/layer_0/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape_1/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/layer_0/inner_group_0/ffn_1/intermediate/output/dense/add" + op: "AddV2" + input: "bert/encoder/transformer/group_0/layer_0/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape_1" + input: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/bias/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/layer_0/inner_group_0/add_1" + op: "AddV2" + input: "bert/encoder/transformer/group_0/layer_0/inner_group_0/ffn_1/intermediate/output/dense/add" + input: "bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm/batchnorm/add_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm_1/beta/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/LayerNorm_1/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm_1/beta" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/LayerNorm_1/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm_1/beta/Assign" + op: "Assign" + input: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm_1/beta" + input: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm_1/beta/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/LayerNorm_1/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm_1/beta/read" + op: "Identity" + input: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm_1/beta" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/LayerNorm_1/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm_1/gamma/Initializer/ones" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/LayerNorm_1/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm_1/gamma" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/LayerNorm_1/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm_1/gamma/Assign" + op: "Assign" + input: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm_1/gamma" + input: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm_1/gamma/Initializer/ones" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/LayerNorm_1/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm_1/gamma/read" + op: "Identity" + input: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm_1/gamma" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/LayerNorm_1/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm_1/moments/mean/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 2 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm_1/moments/mean" + op: "Mean" + input: "bert/encoder/transformer/group_0/layer_0/inner_group_0/add_1" + input: "bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm_1/moments/mean/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: true + } + } +} +node { + name: "bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm_1/moments/StopGradient" + op: "StopGradient" + input: "bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm_1/moments/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm_1/moments/SquaredDifference" + op: "SquaredDifference" + input: "bert/encoder/transformer/group_0/layer_0/inner_group_0/add_1" + input: "bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm_1/moments/StopGradient" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm_1/moments/variance/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 2 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm_1/moments/variance" + op: "Mean" + input: "bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm_1/moments/SquaredDifference" + input: "bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm_1/moments/variance/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: true + } + } +} +node { + name: "bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm_1/batchnorm/add/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999960041972e-13 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm_1/batchnorm/add" + op: "AddV2" + input: "bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm_1/moments/variance" + input: "bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm_1/batchnorm/add/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm_1/batchnorm/Rsqrt" + op: "Rsqrt" + input: "bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm_1/batchnorm/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm_1/batchnorm/mul" + op: "Mul" + input: "bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm_1/batchnorm/Rsqrt" + input: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm_1/gamma/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm_1/batchnorm/mul_1" + op: "Mul" + input: "bert/encoder/transformer/group_0/layer_0/inner_group_0/add_1" + input: "bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm_1/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm_1/batchnorm/mul_2" + op: "Mul" + input: "bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm_1/moments/mean" + input: "bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm_1/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm_1/batchnorm/sub" + op: "Sub" + input: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm_1/beta/read" + input: "bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm_1/batchnorm/mul_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm_1/batchnorm/add_1" + op: "AddV2" + input: "bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm_1/batchnorm/mul_1" + input: "bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm_1/batchnorm/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/query/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\000\003\000\000\014\000\000\000@\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/query/Reshape" + op: "Reshape" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/kernel/read" + input: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/query/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/query/Reshape_1/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\014\000\000\000@\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/query/Reshape_1" + op: "Reshape" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/bias/read" + input: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/query/Reshape_1/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/query/einsum/transpose/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/query/einsum/transpose" + op: "Transpose" + input: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/query/Reshape" + input: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/query/einsum/transpose/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 64 + } + dim { + size: 12 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/query/einsum/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\004\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/query/einsum/Reshape" + op: "Reshape" + input: "bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm_1/batchnorm/add_1" + input: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/query/einsum/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/query/einsum/Reshape_1/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/query/einsum/Reshape_1" + op: "Reshape" + input: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/query/einsum/transpose" + input: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/query/einsum/Reshape_1/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/query/einsum/MatMul" + op: "MatMul" + input: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/query/einsum/Reshape" + input: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/query/einsum/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/query/einsum/Reshape_2/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\020\000\000\000@\000\000\000@\000\000\000\014\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/query/einsum/Reshape_2" + op: "Reshape" + input: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/query/einsum/MatMul" + input: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/query/einsum/Reshape_2/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 64 + } + dim { + size: 12 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/query/einsum/transpose_1/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\000\000\000\000\001\000\000\000\003\000\000\000\002\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/query/einsum/transpose_1" + op: "Transpose" + input: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/query/einsum/Reshape_2" + input: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/query/einsum/transpose_1/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/query/add" + op: "AddV2" + input: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/query/einsum/transpose_1" + input: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/query/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/key/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\000\003\000\000\014\000\000\000@\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/key/Reshape" + op: "Reshape" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/kernel/read" + input: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/key/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/key/Reshape_1/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\014\000\000\000@\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/key/Reshape_1" + op: "Reshape" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/bias/read" + input: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/key/Reshape_1/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/key/einsum/transpose/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/key/einsum/transpose" + op: "Transpose" + input: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/key/Reshape" + input: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/key/einsum/transpose/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 64 + } + dim { + size: 12 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/key/einsum/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\004\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/key/einsum/Reshape" + op: "Reshape" + input: "bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm_1/batchnorm/add_1" + input: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/key/einsum/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/key/einsum/Reshape_1/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/key/einsum/Reshape_1" + op: "Reshape" + input: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/key/einsum/transpose" + input: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/key/einsum/Reshape_1/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/key/einsum/MatMul" + op: "MatMul" + input: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/key/einsum/Reshape" + input: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/key/einsum/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/key/einsum/Reshape_2/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\020\000\000\000@\000\000\000@\000\000\000\014\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/key/einsum/Reshape_2" + op: "Reshape" + input: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/key/einsum/MatMul" + input: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/key/einsum/Reshape_2/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 64 + } + dim { + size: 12 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/key/einsum/transpose_1/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\000\000\000\000\001\000\000\000\003\000\000\000\002\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/key/einsum/transpose_1" + op: "Transpose" + input: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/key/einsum/Reshape_2" + input: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/key/einsum/transpose_1/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/key/add" + op: "AddV2" + input: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/key/einsum/transpose_1" + input: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/key/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/value/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\000\003\000\000\014\000\000\000@\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/value/Reshape" + op: "Reshape" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/value/kernel/read" + input: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/value/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/value/Reshape_1/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\014\000\000\000@\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/value/Reshape_1" + op: "Reshape" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/value/bias/read" + input: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/value/Reshape_1/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/value/einsum/transpose/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/value/einsum/transpose" + op: "Transpose" + input: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/value/Reshape" + input: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/value/einsum/transpose/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 64 + } + dim { + size: 12 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/value/einsum/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\004\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/value/einsum/Reshape" + op: "Reshape" + input: "bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm_1/batchnorm/add_1" + input: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/value/einsum/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/value/einsum/Reshape_1/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/value/einsum/Reshape_1" + op: "Reshape" + input: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/value/einsum/transpose" + input: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/value/einsum/Reshape_1/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/value/einsum/MatMul" + op: "MatMul" + input: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/value/einsum/Reshape" + input: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/value/einsum/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/value/einsum/Reshape_2/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\020\000\000\000@\000\000\000@\000\000\000\014\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/value/einsum/Reshape_2" + op: "Reshape" + input: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/value/einsum/MatMul" + input: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/value/einsum/Reshape_2/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 64 + } + dim { + size: 12 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/value/einsum/transpose_1/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\000\000\000\000\001\000\000\000\003\000\000\000\002\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/value/einsum/transpose_1" + op: "Transpose" + input: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/value/einsum/Reshape_2" + input: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/value/einsum/transpose_1/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/value/add" + op: "AddV2" + input: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/value/einsum/transpose_1" + input: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/value/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/transpose/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000\003\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/transpose" + op: "Transpose" + input: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/query/add" + input: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/transpose/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/transpose_1/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000\003\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/transpose_1" + op: "Transpose" + input: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/key/add" + input: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/transpose_1/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/transpose_2/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000\003\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/transpose_2" + op: "Transpose" + input: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/value/add" + input: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/transpose_2/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\020\000\000\000\001\000\000\000@\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/Reshape" + op: "Reshape" + input: "IteratorGetNext:1" + input: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/Reshape/shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 1 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/MatMul" + op: "BatchMatMulV2" + input: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/transpose" + input: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/transpose_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: false + } + } + attr { + key: "adj_y" + value { + b: true + } + } +} +node { + name: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/Mul/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.125 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/Mul" + op: "Mul" + input: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/MatMul" + input: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/Mul/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/ones/shape_as_tensor" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\020\000\000\000\001\000\000\000@\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/ones/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/ones" + op: "Fill" + input: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/ones/shape_as_tensor" + input: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/ones/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 1 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/Cast" + op: "Cast" + input: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/Reshape" + attr { + key: "DstT" + value { + type: DT_FLOAT + } + } + attr { + key: "SrcT" + value { + type: DT_INT32 + } + } + attr { + key: "Truncate" + value { + b: false + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 1 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/MatMul_1" + op: "BatchMatMulV2" + input: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/ones" + input: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/Cast" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 1 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: false + } + } + attr { + key: "adj_y" + value { + b: true + } + } +} +node { + name: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/sub/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/sub" + op: "Sub" + input: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/sub/x" + input: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/MatMul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 1 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/mul_1/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: -10000.0 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/mul_1" + op: "Mul" + input: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/sub" + input: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/mul_1/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 1 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/add" + op: "AddV2" + input: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/Mul" + input: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/mul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/attention_probs" + op: "Softmax" + input: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/MatMul_2" + op: "BatchMatMulV2" + input: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/attention_probs" + input: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/transpose_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: false + } + } + attr { + key: "adj_y" + value { + b: false + } + } +} +node { + name: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/transpose_3/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000\003\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/transpose_3" + op: "Transpose" + input: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/MatMul_2" + input: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/transpose_3/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/output/dense/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\014\000\000\000@\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/output/dense/Reshape" + op: "Reshape" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/output/dense/kernel/read" + input: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/output/dense/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/output/dense/einsum/transpose/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\000\000\000\000\001\000\000\000\003\000\000\000\002\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/output/dense/einsum/transpose" + op: "Transpose" + input: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/transpose_3" + input: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/output/dense/einsum/transpose/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 64 + } + dim { + size: 12 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/output/dense/einsum/transpose_1/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\001\000\000\000\000\000\000\000\002\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/output/dense/einsum/transpose_1" + op: "Transpose" + input: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/output/dense/Reshape" + input: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/output/dense/einsum/transpose_1/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 64 + } + dim { + size: 12 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/output/dense/einsum/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\004\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/output/dense/einsum/Reshape" + op: "Reshape" + input: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/output/dense/einsum/transpose" + input: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/output/dense/einsum/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/output/dense/einsum/Reshape_1/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/output/dense/einsum/Reshape_1" + op: "Reshape" + input: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/output/dense/einsum/transpose_1" + input: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/output/dense/einsum/Reshape_1/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/output/dense/einsum/MatMul" + op: "MatMul" + input: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/output/dense/einsum/Reshape" + input: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/output/dense/einsum/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/output/dense/einsum/Reshape_2/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/output/dense/einsum/Reshape_2" + op: "Reshape" + input: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/output/dense/einsum/MatMul" + input: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/output/dense/einsum/Reshape_2/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/output/dense/add" + op: "AddV2" + input: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/output/dense/einsum/Reshape_2" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/output/dense/bias/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/add" + op: "AddV2" + input: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/output/dense/add" + input: "bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm_1/batchnorm/add_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm/moments/mean/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 2 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm/moments/mean" + op: "Mean" + input: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/add" + input: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm/moments/mean/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: true + } + } +} +node { + name: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm/moments/StopGradient" + op: "StopGradient" + input: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm/moments/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm/moments/SquaredDifference" + op: "SquaredDifference" + input: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/add" + input: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm/moments/StopGradient" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm/moments/variance/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 2 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm/moments/variance" + op: "Mean" + input: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm/moments/SquaredDifference" + input: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm/moments/variance/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: true + } + } +} +node { + name: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm/batchnorm/add/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999960041972e-13 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm/batchnorm/add" + op: "AddV2" + input: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm/moments/variance" + input: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm/batchnorm/add/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm/batchnorm/Rsqrt" + op: "Rsqrt" + input: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm/batchnorm/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm/batchnorm/mul" + op: "Mul" + input: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm/batchnorm/Rsqrt" + input: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm/gamma/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm/batchnorm/mul_1" + op: "Mul" + input: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/add" + input: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm/batchnorm/mul_2" + op: "Mul" + input: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm/moments/mean" + input: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm/batchnorm/sub" + op: "Sub" + input: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm/beta/read" + input: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm/batchnorm/mul_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm/batchnorm/add_1" + op: "AddV2" + input: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm/batchnorm/mul_1" + input: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm/batchnorm/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/ffn_1/intermediate/dense/einsum/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\004\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/ffn_1/intermediate/dense/einsum/Reshape" + op: "Reshape" + input: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm/batchnorm/add_1" + input: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/ffn_1/intermediate/dense/einsum/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/ffn_1/intermediate/dense/einsum/MatMul" + op: "MatMul" + input: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/ffn_1/intermediate/dense/einsum/Reshape" + input: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/ffn_1/intermediate/dense/einsum/Reshape_1/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\000\014\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/ffn_1/intermediate/dense/einsum/Reshape_1" + op: "Reshape" + input: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/ffn_1/intermediate/dense/einsum/MatMul" + input: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/ffn_1/intermediate/dense/einsum/Reshape_1/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/ffn_1/intermediate/dense/add" + op: "AddV2" + input: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/ffn_1/intermediate/dense/einsum/Reshape_1" + input: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/bias/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/ffn_1/intermediate/Pow/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 3.0 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/ffn_1/intermediate/Pow" + op: "Pow" + input: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/ffn_1/intermediate/dense/add" + input: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/ffn_1/intermediate/Pow/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/ffn_1/intermediate/mul/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.044714998453855515 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/ffn_1/intermediate/mul" + op: "Mul" + input: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/ffn_1/intermediate/mul/x" + input: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/ffn_1/intermediate/Pow" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/ffn_1/intermediate/add" + op: "AddV2" + input: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/ffn_1/intermediate/dense/add" + input: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/ffn_1/intermediate/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/ffn_1/intermediate/mul_1/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.7978845834732056 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/ffn_1/intermediate/mul_1" + op: "Mul" + input: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/ffn_1/intermediate/mul_1/x" + input: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/ffn_1/intermediate/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/ffn_1/intermediate/Tanh" + op: "Tanh" + input: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/ffn_1/intermediate/mul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/ffn_1/intermediate/add_1/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/ffn_1/intermediate/add_1" + op: "AddV2" + input: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/ffn_1/intermediate/add_1/x" + input: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/ffn_1/intermediate/Tanh" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/ffn_1/intermediate/mul_2/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.5 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/ffn_1/intermediate/mul_2" + op: "Mul" + input: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/ffn_1/intermediate/mul_2/x" + input: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/ffn_1/intermediate/add_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/ffn_1/intermediate/mul_3" + op: "Mul" + input: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/ffn_1/intermediate/dense/add" + input: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/ffn_1/intermediate/mul_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\004\000\000\000\014\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape" + op: "Reshape" + input: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/ffn_1/intermediate/mul_3" + input: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/ffn_1/intermediate/output/dense/einsum/MatMul" + op: "MatMul" + input: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape" + input: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape_1/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape_1" + op: "Reshape" + input: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/ffn_1/intermediate/output/dense/einsum/MatMul" + input: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape_1/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/ffn_1/intermediate/output/dense/add" + op: "AddV2" + input: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape_1" + input: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/bias/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/add_1" + op: "AddV2" + input: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/ffn_1/intermediate/output/dense/add" + input: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm/batchnorm/add_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm_1/moments/mean/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 2 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm_1/moments/mean" + op: "Mean" + input: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/add_1" + input: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm_1/moments/mean/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: true + } + } +} +node { + name: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm_1/moments/StopGradient" + op: "StopGradient" + input: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm_1/moments/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm_1/moments/SquaredDifference" + op: "SquaredDifference" + input: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/add_1" + input: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm_1/moments/StopGradient" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm_1/moments/variance/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 2 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm_1/moments/variance" + op: "Mean" + input: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm_1/moments/SquaredDifference" + input: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm_1/moments/variance/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: true + } + } +} +node { + name: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm_1/batchnorm/add/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999960041972e-13 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm_1/batchnorm/add" + op: "AddV2" + input: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm_1/moments/variance" + input: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm_1/batchnorm/add/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm_1/batchnorm/Rsqrt" + op: "Rsqrt" + input: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm_1/batchnorm/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm_1/batchnorm/mul" + op: "Mul" + input: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm_1/batchnorm/Rsqrt" + input: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm_1/gamma/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm_1/batchnorm/mul_1" + op: "Mul" + input: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/add_1" + input: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm_1/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm_1/batchnorm/mul_2" + op: "Mul" + input: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm_1/moments/mean" + input: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm_1/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm_1/batchnorm/sub" + op: "Sub" + input: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm_1/beta/read" + input: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm_1/batchnorm/mul_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm_1/batchnorm/add_1" + op: "AddV2" + input: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm_1/batchnorm/mul_1" + input: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm_1/batchnorm/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/query/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\000\003\000\000\014\000\000\000@\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/query/Reshape" + op: "Reshape" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/kernel/read" + input: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/query/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/query/Reshape_1/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\014\000\000\000@\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/query/Reshape_1" + op: "Reshape" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/bias/read" + input: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/query/Reshape_1/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/query/einsum/transpose/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/query/einsum/transpose" + op: "Transpose" + input: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/query/Reshape" + input: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/query/einsum/transpose/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 64 + } + dim { + size: 12 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/query/einsum/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\004\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/query/einsum/Reshape" + op: "Reshape" + input: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm_1/batchnorm/add_1" + input: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/query/einsum/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/query/einsum/Reshape_1/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/query/einsum/Reshape_1" + op: "Reshape" + input: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/query/einsum/transpose" + input: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/query/einsum/Reshape_1/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/query/einsum/MatMul" + op: "MatMul" + input: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/query/einsum/Reshape" + input: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/query/einsum/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/query/einsum/Reshape_2/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\020\000\000\000@\000\000\000@\000\000\000\014\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/query/einsum/Reshape_2" + op: "Reshape" + input: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/query/einsum/MatMul" + input: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/query/einsum/Reshape_2/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 64 + } + dim { + size: 12 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/query/einsum/transpose_1/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\000\000\000\000\001\000\000\000\003\000\000\000\002\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/query/einsum/transpose_1" + op: "Transpose" + input: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/query/einsum/Reshape_2" + input: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/query/einsum/transpose_1/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/query/add" + op: "AddV2" + input: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/query/einsum/transpose_1" + input: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/query/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/key/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\000\003\000\000\014\000\000\000@\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/key/Reshape" + op: "Reshape" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/kernel/read" + input: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/key/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/key/Reshape_1/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\014\000\000\000@\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/key/Reshape_1" + op: "Reshape" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/bias/read" + input: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/key/Reshape_1/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/key/einsum/transpose/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/key/einsum/transpose" + op: "Transpose" + input: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/key/Reshape" + input: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/key/einsum/transpose/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 64 + } + dim { + size: 12 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/key/einsum/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\004\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/key/einsum/Reshape" + op: "Reshape" + input: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm_1/batchnorm/add_1" + input: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/key/einsum/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/key/einsum/Reshape_1/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/key/einsum/Reshape_1" + op: "Reshape" + input: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/key/einsum/transpose" + input: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/key/einsum/Reshape_1/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/key/einsum/MatMul" + op: "MatMul" + input: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/key/einsum/Reshape" + input: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/key/einsum/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/key/einsum/Reshape_2/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\020\000\000\000@\000\000\000@\000\000\000\014\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/key/einsum/Reshape_2" + op: "Reshape" + input: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/key/einsum/MatMul" + input: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/key/einsum/Reshape_2/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 64 + } + dim { + size: 12 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/key/einsum/transpose_1/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\000\000\000\000\001\000\000\000\003\000\000\000\002\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/key/einsum/transpose_1" + op: "Transpose" + input: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/key/einsum/Reshape_2" + input: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/key/einsum/transpose_1/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/key/add" + op: "AddV2" + input: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/key/einsum/transpose_1" + input: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/key/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/value/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\000\003\000\000\014\000\000\000@\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/value/Reshape" + op: "Reshape" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/value/kernel/read" + input: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/value/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/value/Reshape_1/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\014\000\000\000@\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/value/Reshape_1" + op: "Reshape" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/value/bias/read" + input: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/value/Reshape_1/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/value/einsum/transpose/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/value/einsum/transpose" + op: "Transpose" + input: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/value/Reshape" + input: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/value/einsum/transpose/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 64 + } + dim { + size: 12 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/value/einsum/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\004\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/value/einsum/Reshape" + op: "Reshape" + input: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm_1/batchnorm/add_1" + input: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/value/einsum/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/value/einsum/Reshape_1/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/value/einsum/Reshape_1" + op: "Reshape" + input: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/value/einsum/transpose" + input: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/value/einsum/Reshape_1/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/value/einsum/MatMul" + op: "MatMul" + input: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/value/einsum/Reshape" + input: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/value/einsum/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/value/einsum/Reshape_2/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\020\000\000\000@\000\000\000@\000\000\000\014\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/value/einsum/Reshape_2" + op: "Reshape" + input: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/value/einsum/MatMul" + input: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/value/einsum/Reshape_2/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 64 + } + dim { + size: 12 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/value/einsum/transpose_1/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\000\000\000\000\001\000\000\000\003\000\000\000\002\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/value/einsum/transpose_1" + op: "Transpose" + input: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/value/einsum/Reshape_2" + input: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/value/einsum/transpose_1/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/value/add" + op: "AddV2" + input: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/value/einsum/transpose_1" + input: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/value/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/transpose/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000\003\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/transpose" + op: "Transpose" + input: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/query/add" + input: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/transpose/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/transpose_1/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000\003\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/transpose_1" + op: "Transpose" + input: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/key/add" + input: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/transpose_1/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/transpose_2/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000\003\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/transpose_2" + op: "Transpose" + input: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/value/add" + input: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/transpose_2/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\020\000\000\000\001\000\000\000@\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/Reshape" + op: "Reshape" + input: "IteratorGetNext:1" + input: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/Reshape/shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 1 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/MatMul" + op: "BatchMatMulV2" + input: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/transpose" + input: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/transpose_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: false + } + } + attr { + key: "adj_y" + value { + b: true + } + } +} +node { + name: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/Mul/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.125 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/Mul" + op: "Mul" + input: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/MatMul" + input: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/Mul/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/ones/shape_as_tensor" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\020\000\000\000\001\000\000\000@\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/ones/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/ones" + op: "Fill" + input: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/ones/shape_as_tensor" + input: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/ones/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 1 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/Cast" + op: "Cast" + input: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/Reshape" + attr { + key: "DstT" + value { + type: DT_FLOAT + } + } + attr { + key: "SrcT" + value { + type: DT_INT32 + } + } + attr { + key: "Truncate" + value { + b: false + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 1 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/MatMul_1" + op: "BatchMatMulV2" + input: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/ones" + input: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/Cast" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 1 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: false + } + } + attr { + key: "adj_y" + value { + b: true + } + } +} +node { + name: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/sub/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/sub" + op: "Sub" + input: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/sub/x" + input: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/MatMul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 1 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/mul_1/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: -10000.0 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/mul_1" + op: "Mul" + input: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/sub" + input: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/mul_1/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 1 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/add" + op: "AddV2" + input: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/Mul" + input: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/mul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/attention_probs" + op: "Softmax" + input: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/MatMul_2" + op: "BatchMatMulV2" + input: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/attention_probs" + input: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/transpose_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: false + } + } + attr { + key: "adj_y" + value { + b: false + } + } +} +node { + name: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/transpose_3/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000\003\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/transpose_3" + op: "Transpose" + input: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/MatMul_2" + input: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/transpose_3/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/output/dense/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\014\000\000\000@\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/output/dense/Reshape" + op: "Reshape" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/output/dense/kernel/read" + input: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/output/dense/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/output/dense/einsum/transpose/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\000\000\000\000\001\000\000\000\003\000\000\000\002\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/output/dense/einsum/transpose" + op: "Transpose" + input: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/transpose_3" + input: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/output/dense/einsum/transpose/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 64 + } + dim { + size: 12 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/output/dense/einsum/transpose_1/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\001\000\000\000\000\000\000\000\002\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/output/dense/einsum/transpose_1" + op: "Transpose" + input: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/output/dense/Reshape" + input: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/output/dense/einsum/transpose_1/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 64 + } + dim { + size: 12 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/output/dense/einsum/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\004\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/output/dense/einsum/Reshape" + op: "Reshape" + input: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/output/dense/einsum/transpose" + input: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/output/dense/einsum/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/output/dense/einsum/Reshape_1/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/output/dense/einsum/Reshape_1" + op: "Reshape" + input: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/output/dense/einsum/transpose_1" + input: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/output/dense/einsum/Reshape_1/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/output/dense/einsum/MatMul" + op: "MatMul" + input: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/output/dense/einsum/Reshape" + input: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/output/dense/einsum/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/output/dense/einsum/Reshape_2/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/output/dense/einsum/Reshape_2" + op: "Reshape" + input: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/output/dense/einsum/MatMul" + input: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/output/dense/einsum/Reshape_2/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/output/dense/add" + op: "AddV2" + input: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/output/dense/einsum/Reshape_2" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/output/dense/bias/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/add" + op: "AddV2" + input: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/output/dense/add" + input: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm_1/batchnorm/add_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm/moments/mean/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 2 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm/moments/mean" + op: "Mean" + input: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/add" + input: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm/moments/mean/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: true + } + } +} +node { + name: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm/moments/StopGradient" + op: "StopGradient" + input: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm/moments/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm/moments/SquaredDifference" + op: "SquaredDifference" + input: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/add" + input: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm/moments/StopGradient" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm/moments/variance/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 2 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm/moments/variance" + op: "Mean" + input: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm/moments/SquaredDifference" + input: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm/moments/variance/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: true + } + } +} +node { + name: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm/batchnorm/add/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999960041972e-13 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm/batchnorm/add" + op: "AddV2" + input: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm/moments/variance" + input: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm/batchnorm/add/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm/batchnorm/Rsqrt" + op: "Rsqrt" + input: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm/batchnorm/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm/batchnorm/mul" + op: "Mul" + input: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm/batchnorm/Rsqrt" + input: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm/gamma/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm/batchnorm/mul_1" + op: "Mul" + input: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/add" + input: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm/batchnorm/mul_2" + op: "Mul" + input: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm/moments/mean" + input: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm/batchnorm/sub" + op: "Sub" + input: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm/beta/read" + input: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm/batchnorm/mul_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm/batchnorm/add_1" + op: "AddV2" + input: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm/batchnorm/mul_1" + input: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm/batchnorm/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/ffn_1/intermediate/dense/einsum/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\004\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/ffn_1/intermediate/dense/einsum/Reshape" + op: "Reshape" + input: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm/batchnorm/add_1" + input: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/ffn_1/intermediate/dense/einsum/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/ffn_1/intermediate/dense/einsum/MatMul" + op: "MatMul" + input: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/ffn_1/intermediate/dense/einsum/Reshape" + input: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/ffn_1/intermediate/dense/einsum/Reshape_1/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\000\014\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/ffn_1/intermediate/dense/einsum/Reshape_1" + op: "Reshape" + input: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/ffn_1/intermediate/dense/einsum/MatMul" + input: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/ffn_1/intermediate/dense/einsum/Reshape_1/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/ffn_1/intermediate/dense/add" + op: "AddV2" + input: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/ffn_1/intermediate/dense/einsum/Reshape_1" + input: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/bias/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/ffn_1/intermediate/Pow/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 3.0 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/ffn_1/intermediate/Pow" + op: "Pow" + input: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/ffn_1/intermediate/dense/add" + input: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/ffn_1/intermediate/Pow/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/ffn_1/intermediate/mul/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.044714998453855515 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/ffn_1/intermediate/mul" + op: "Mul" + input: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/ffn_1/intermediate/mul/x" + input: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/ffn_1/intermediate/Pow" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/ffn_1/intermediate/add" + op: "AddV2" + input: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/ffn_1/intermediate/dense/add" + input: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/ffn_1/intermediate/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/ffn_1/intermediate/mul_1/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.7978845834732056 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/ffn_1/intermediate/mul_1" + op: "Mul" + input: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/ffn_1/intermediate/mul_1/x" + input: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/ffn_1/intermediate/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/ffn_1/intermediate/Tanh" + op: "Tanh" + input: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/ffn_1/intermediate/mul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/ffn_1/intermediate/add_1/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/ffn_1/intermediate/add_1" + op: "AddV2" + input: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/ffn_1/intermediate/add_1/x" + input: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/ffn_1/intermediate/Tanh" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/ffn_1/intermediate/mul_2/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.5 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/ffn_1/intermediate/mul_2" + op: "Mul" + input: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/ffn_1/intermediate/mul_2/x" + input: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/ffn_1/intermediate/add_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/ffn_1/intermediate/mul_3" + op: "Mul" + input: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/ffn_1/intermediate/dense/add" + input: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/ffn_1/intermediate/mul_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\004\000\000\000\014\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape" + op: "Reshape" + input: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/ffn_1/intermediate/mul_3" + input: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/ffn_1/intermediate/output/dense/einsum/MatMul" + op: "MatMul" + input: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape" + input: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape_1/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape_1" + op: "Reshape" + input: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/ffn_1/intermediate/output/dense/einsum/MatMul" + input: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape_1/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/ffn_1/intermediate/output/dense/add" + op: "AddV2" + input: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape_1" + input: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/bias/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/add_1" + op: "AddV2" + input: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/ffn_1/intermediate/output/dense/add" + input: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm/batchnorm/add_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm_1/moments/mean/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 2 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm_1/moments/mean" + op: "Mean" + input: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/add_1" + input: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm_1/moments/mean/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: true + } + } +} +node { + name: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm_1/moments/StopGradient" + op: "StopGradient" + input: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm_1/moments/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm_1/moments/SquaredDifference" + op: "SquaredDifference" + input: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/add_1" + input: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm_1/moments/StopGradient" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm_1/moments/variance/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 2 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm_1/moments/variance" + op: "Mean" + input: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm_1/moments/SquaredDifference" + input: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm_1/moments/variance/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: true + } + } +} +node { + name: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm_1/batchnorm/add/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999960041972e-13 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm_1/batchnorm/add" + op: "AddV2" + input: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm_1/moments/variance" + input: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm_1/batchnorm/add/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm_1/batchnorm/Rsqrt" + op: "Rsqrt" + input: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm_1/batchnorm/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm_1/batchnorm/mul" + op: "Mul" + input: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm_1/batchnorm/Rsqrt" + input: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm_1/gamma/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm_1/batchnorm/mul_1" + op: "Mul" + input: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/add_1" + input: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm_1/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm_1/batchnorm/mul_2" + op: "Mul" + input: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm_1/moments/mean" + input: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm_1/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm_1/batchnorm/sub" + op: "Sub" + input: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm_1/beta/read" + input: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm_1/batchnorm/mul_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm_1/batchnorm/add_1" + op: "AddV2" + input: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm_1/batchnorm/mul_1" + input: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm_1/batchnorm/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/query/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\000\003\000\000\014\000\000\000@\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/query/Reshape" + op: "Reshape" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/kernel/read" + input: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/query/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/query/Reshape_1/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\014\000\000\000@\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/query/Reshape_1" + op: "Reshape" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/bias/read" + input: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/query/Reshape_1/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/query/einsum/transpose/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/query/einsum/transpose" + op: "Transpose" + input: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/query/Reshape" + input: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/query/einsum/transpose/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 64 + } + dim { + size: 12 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/query/einsum/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\004\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/query/einsum/Reshape" + op: "Reshape" + input: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm_1/batchnorm/add_1" + input: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/query/einsum/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/query/einsum/Reshape_1/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/query/einsum/Reshape_1" + op: "Reshape" + input: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/query/einsum/transpose" + input: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/query/einsum/Reshape_1/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/query/einsum/MatMul" + op: "MatMul" + input: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/query/einsum/Reshape" + input: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/query/einsum/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/query/einsum/Reshape_2/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\020\000\000\000@\000\000\000@\000\000\000\014\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/query/einsum/Reshape_2" + op: "Reshape" + input: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/query/einsum/MatMul" + input: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/query/einsum/Reshape_2/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 64 + } + dim { + size: 12 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/query/einsum/transpose_1/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\000\000\000\000\001\000\000\000\003\000\000\000\002\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/query/einsum/transpose_1" + op: "Transpose" + input: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/query/einsum/Reshape_2" + input: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/query/einsum/transpose_1/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/query/add" + op: "AddV2" + input: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/query/einsum/transpose_1" + input: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/query/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/key/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\000\003\000\000\014\000\000\000@\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/key/Reshape" + op: "Reshape" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/kernel/read" + input: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/key/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/key/Reshape_1/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\014\000\000\000@\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/key/Reshape_1" + op: "Reshape" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/bias/read" + input: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/key/Reshape_1/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/key/einsum/transpose/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/key/einsum/transpose" + op: "Transpose" + input: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/key/Reshape" + input: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/key/einsum/transpose/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 64 + } + dim { + size: 12 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/key/einsum/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\004\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/key/einsum/Reshape" + op: "Reshape" + input: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm_1/batchnorm/add_1" + input: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/key/einsum/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/key/einsum/Reshape_1/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/key/einsum/Reshape_1" + op: "Reshape" + input: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/key/einsum/transpose" + input: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/key/einsum/Reshape_1/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/key/einsum/MatMul" + op: "MatMul" + input: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/key/einsum/Reshape" + input: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/key/einsum/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/key/einsum/Reshape_2/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\020\000\000\000@\000\000\000@\000\000\000\014\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/key/einsum/Reshape_2" + op: "Reshape" + input: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/key/einsum/MatMul" + input: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/key/einsum/Reshape_2/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 64 + } + dim { + size: 12 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/key/einsum/transpose_1/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\000\000\000\000\001\000\000\000\003\000\000\000\002\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/key/einsum/transpose_1" + op: "Transpose" + input: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/key/einsum/Reshape_2" + input: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/key/einsum/transpose_1/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/key/add" + op: "AddV2" + input: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/key/einsum/transpose_1" + input: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/key/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/value/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\000\003\000\000\014\000\000\000@\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/value/Reshape" + op: "Reshape" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/value/kernel/read" + input: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/value/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/value/Reshape_1/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\014\000\000\000@\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/value/Reshape_1" + op: "Reshape" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/value/bias/read" + input: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/value/Reshape_1/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/value/einsum/transpose/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/value/einsum/transpose" + op: "Transpose" + input: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/value/Reshape" + input: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/value/einsum/transpose/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 64 + } + dim { + size: 12 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/value/einsum/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\004\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/value/einsum/Reshape" + op: "Reshape" + input: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm_1/batchnorm/add_1" + input: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/value/einsum/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/value/einsum/Reshape_1/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/value/einsum/Reshape_1" + op: "Reshape" + input: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/value/einsum/transpose" + input: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/value/einsum/Reshape_1/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/value/einsum/MatMul" + op: "MatMul" + input: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/value/einsum/Reshape" + input: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/value/einsum/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/value/einsum/Reshape_2/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\020\000\000\000@\000\000\000@\000\000\000\014\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/value/einsum/Reshape_2" + op: "Reshape" + input: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/value/einsum/MatMul" + input: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/value/einsum/Reshape_2/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 64 + } + dim { + size: 12 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/value/einsum/transpose_1/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\000\000\000\000\001\000\000\000\003\000\000\000\002\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/value/einsum/transpose_1" + op: "Transpose" + input: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/value/einsum/Reshape_2" + input: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/value/einsum/transpose_1/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/value/add" + op: "AddV2" + input: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/value/einsum/transpose_1" + input: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/value/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/transpose/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000\003\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/transpose" + op: "Transpose" + input: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/query/add" + input: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/transpose/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/transpose_1/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000\003\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/transpose_1" + op: "Transpose" + input: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/key/add" + input: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/transpose_1/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/transpose_2/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000\003\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/transpose_2" + op: "Transpose" + input: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/value/add" + input: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/transpose_2/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\020\000\000\000\001\000\000\000@\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/Reshape" + op: "Reshape" + input: "IteratorGetNext:1" + input: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/Reshape/shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 1 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/MatMul" + op: "BatchMatMulV2" + input: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/transpose" + input: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/transpose_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: false + } + } + attr { + key: "adj_y" + value { + b: true + } + } +} +node { + name: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/Mul/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.125 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/Mul" + op: "Mul" + input: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/MatMul" + input: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/Mul/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/ones/shape_as_tensor" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\020\000\000\000\001\000\000\000@\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/ones/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/ones" + op: "Fill" + input: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/ones/shape_as_tensor" + input: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/ones/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 1 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/Cast" + op: "Cast" + input: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/Reshape" + attr { + key: "DstT" + value { + type: DT_FLOAT + } + } + attr { + key: "SrcT" + value { + type: DT_INT32 + } + } + attr { + key: "Truncate" + value { + b: false + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 1 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/MatMul_1" + op: "BatchMatMulV2" + input: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/ones" + input: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/Cast" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 1 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: false + } + } + attr { + key: "adj_y" + value { + b: true + } + } +} +node { + name: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/sub/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/sub" + op: "Sub" + input: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/sub/x" + input: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/MatMul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 1 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/mul_1/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: -10000.0 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/mul_1" + op: "Mul" + input: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/sub" + input: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/mul_1/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 1 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/add" + op: "AddV2" + input: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/Mul" + input: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/mul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/attention_probs" + op: "Softmax" + input: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/MatMul_2" + op: "BatchMatMulV2" + input: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/attention_probs" + input: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/transpose_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: false + } + } + attr { + key: "adj_y" + value { + b: false + } + } +} +node { + name: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/transpose_3/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000\003\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/transpose_3" + op: "Transpose" + input: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/MatMul_2" + input: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/transpose_3/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/output/dense/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\014\000\000\000@\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/output/dense/Reshape" + op: "Reshape" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/output/dense/kernel/read" + input: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/output/dense/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/output/dense/einsum/transpose/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\000\000\000\000\001\000\000\000\003\000\000\000\002\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/output/dense/einsum/transpose" + op: "Transpose" + input: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/transpose_3" + input: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/output/dense/einsum/transpose/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 64 + } + dim { + size: 12 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/output/dense/einsum/transpose_1/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\001\000\000\000\000\000\000\000\002\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/output/dense/einsum/transpose_1" + op: "Transpose" + input: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/output/dense/Reshape" + input: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/output/dense/einsum/transpose_1/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 64 + } + dim { + size: 12 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/output/dense/einsum/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\004\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/output/dense/einsum/Reshape" + op: "Reshape" + input: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/output/dense/einsum/transpose" + input: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/output/dense/einsum/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/output/dense/einsum/Reshape_1/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/output/dense/einsum/Reshape_1" + op: "Reshape" + input: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/output/dense/einsum/transpose_1" + input: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/output/dense/einsum/Reshape_1/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/output/dense/einsum/MatMul" + op: "MatMul" + input: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/output/dense/einsum/Reshape" + input: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/output/dense/einsum/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/output/dense/einsum/Reshape_2/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/output/dense/einsum/Reshape_2" + op: "Reshape" + input: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/output/dense/einsum/MatMul" + input: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/output/dense/einsum/Reshape_2/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/output/dense/add" + op: "AddV2" + input: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/output/dense/einsum/Reshape_2" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/output/dense/bias/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/add" + op: "AddV2" + input: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/output/dense/add" + input: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm_1/batchnorm/add_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm/moments/mean/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 2 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm/moments/mean" + op: "Mean" + input: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/add" + input: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm/moments/mean/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: true + } + } +} +node { + name: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm/moments/StopGradient" + op: "StopGradient" + input: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm/moments/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm/moments/SquaredDifference" + op: "SquaredDifference" + input: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/add" + input: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm/moments/StopGradient" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm/moments/variance/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 2 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm/moments/variance" + op: "Mean" + input: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm/moments/SquaredDifference" + input: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm/moments/variance/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: true + } + } +} +node { + name: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm/batchnorm/add/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999960041972e-13 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm/batchnorm/add" + op: "AddV2" + input: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm/moments/variance" + input: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm/batchnorm/add/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm/batchnorm/Rsqrt" + op: "Rsqrt" + input: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm/batchnorm/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm/batchnorm/mul" + op: "Mul" + input: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm/batchnorm/Rsqrt" + input: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm/gamma/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm/batchnorm/mul_1" + op: "Mul" + input: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/add" + input: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm/batchnorm/mul_2" + op: "Mul" + input: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm/moments/mean" + input: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm/batchnorm/sub" + op: "Sub" + input: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm/beta/read" + input: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm/batchnorm/mul_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm/batchnorm/add_1" + op: "AddV2" + input: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm/batchnorm/mul_1" + input: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm/batchnorm/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/ffn_1/intermediate/dense/einsum/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\004\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/ffn_1/intermediate/dense/einsum/Reshape" + op: "Reshape" + input: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm/batchnorm/add_1" + input: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/ffn_1/intermediate/dense/einsum/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/ffn_1/intermediate/dense/einsum/MatMul" + op: "MatMul" + input: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/ffn_1/intermediate/dense/einsum/Reshape" + input: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/ffn_1/intermediate/dense/einsum/Reshape_1/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\000\014\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/ffn_1/intermediate/dense/einsum/Reshape_1" + op: "Reshape" + input: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/ffn_1/intermediate/dense/einsum/MatMul" + input: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/ffn_1/intermediate/dense/einsum/Reshape_1/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/ffn_1/intermediate/dense/add" + op: "AddV2" + input: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/ffn_1/intermediate/dense/einsum/Reshape_1" + input: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/bias/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/ffn_1/intermediate/Pow/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 3.0 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/ffn_1/intermediate/Pow" + op: "Pow" + input: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/ffn_1/intermediate/dense/add" + input: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/ffn_1/intermediate/Pow/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/ffn_1/intermediate/mul/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.044714998453855515 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/ffn_1/intermediate/mul" + op: "Mul" + input: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/ffn_1/intermediate/mul/x" + input: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/ffn_1/intermediate/Pow" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/ffn_1/intermediate/add" + op: "AddV2" + input: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/ffn_1/intermediate/dense/add" + input: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/ffn_1/intermediate/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/ffn_1/intermediate/mul_1/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.7978845834732056 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/ffn_1/intermediate/mul_1" + op: "Mul" + input: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/ffn_1/intermediate/mul_1/x" + input: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/ffn_1/intermediate/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/ffn_1/intermediate/Tanh" + op: "Tanh" + input: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/ffn_1/intermediate/mul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/ffn_1/intermediate/add_1/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/ffn_1/intermediate/add_1" + op: "AddV2" + input: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/ffn_1/intermediate/add_1/x" + input: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/ffn_1/intermediate/Tanh" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/ffn_1/intermediate/mul_2/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.5 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/ffn_1/intermediate/mul_2" + op: "Mul" + input: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/ffn_1/intermediate/mul_2/x" + input: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/ffn_1/intermediate/add_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/ffn_1/intermediate/mul_3" + op: "Mul" + input: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/ffn_1/intermediate/dense/add" + input: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/ffn_1/intermediate/mul_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\004\000\000\000\014\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape" + op: "Reshape" + input: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/ffn_1/intermediate/mul_3" + input: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/ffn_1/intermediate/output/dense/einsum/MatMul" + op: "MatMul" + input: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape" + input: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape_1/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape_1" + op: "Reshape" + input: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/ffn_1/intermediate/output/dense/einsum/MatMul" + input: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape_1/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/ffn_1/intermediate/output/dense/add" + op: "AddV2" + input: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape_1" + input: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/bias/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/add_1" + op: "AddV2" + input: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/ffn_1/intermediate/output/dense/add" + input: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm/batchnorm/add_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm_1/moments/mean/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 2 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm_1/moments/mean" + op: "Mean" + input: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/add_1" + input: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm_1/moments/mean/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: true + } + } +} +node { + name: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm_1/moments/StopGradient" + op: "StopGradient" + input: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm_1/moments/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm_1/moments/SquaredDifference" + op: "SquaredDifference" + input: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/add_1" + input: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm_1/moments/StopGradient" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm_1/moments/variance/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 2 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm_1/moments/variance" + op: "Mean" + input: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm_1/moments/SquaredDifference" + input: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm_1/moments/variance/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: true + } + } +} +node { + name: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm_1/batchnorm/add/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999960041972e-13 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm_1/batchnorm/add" + op: "AddV2" + input: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm_1/moments/variance" + input: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm_1/batchnorm/add/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm_1/batchnorm/Rsqrt" + op: "Rsqrt" + input: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm_1/batchnorm/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm_1/batchnorm/mul" + op: "Mul" + input: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm_1/batchnorm/Rsqrt" + input: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm_1/gamma/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm_1/batchnorm/mul_1" + op: "Mul" + input: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/add_1" + input: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm_1/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm_1/batchnorm/mul_2" + op: "Mul" + input: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm_1/moments/mean" + input: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm_1/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm_1/batchnorm/sub" + op: "Sub" + input: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm_1/beta/read" + input: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm_1/batchnorm/mul_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm_1/batchnorm/add_1" + op: "AddV2" + input: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm_1/batchnorm/mul_1" + input: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm_1/batchnorm/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/query/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\000\003\000\000\014\000\000\000@\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/query/Reshape" + op: "Reshape" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/kernel/read" + input: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/query/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/query/Reshape_1/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\014\000\000\000@\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/query/Reshape_1" + op: "Reshape" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/bias/read" + input: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/query/Reshape_1/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/query/einsum/transpose/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/query/einsum/transpose" + op: "Transpose" + input: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/query/Reshape" + input: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/query/einsum/transpose/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 64 + } + dim { + size: 12 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/query/einsum/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\004\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/query/einsum/Reshape" + op: "Reshape" + input: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm_1/batchnorm/add_1" + input: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/query/einsum/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/query/einsum/Reshape_1/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/query/einsum/Reshape_1" + op: "Reshape" + input: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/query/einsum/transpose" + input: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/query/einsum/Reshape_1/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/query/einsum/MatMul" + op: "MatMul" + input: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/query/einsum/Reshape" + input: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/query/einsum/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/query/einsum/Reshape_2/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\020\000\000\000@\000\000\000@\000\000\000\014\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/query/einsum/Reshape_2" + op: "Reshape" + input: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/query/einsum/MatMul" + input: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/query/einsum/Reshape_2/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 64 + } + dim { + size: 12 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/query/einsum/transpose_1/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\000\000\000\000\001\000\000\000\003\000\000\000\002\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/query/einsum/transpose_1" + op: "Transpose" + input: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/query/einsum/Reshape_2" + input: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/query/einsum/transpose_1/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/query/add" + op: "AddV2" + input: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/query/einsum/transpose_1" + input: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/query/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/key/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\000\003\000\000\014\000\000\000@\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/key/Reshape" + op: "Reshape" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/kernel/read" + input: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/key/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/key/Reshape_1/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\014\000\000\000@\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/key/Reshape_1" + op: "Reshape" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/bias/read" + input: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/key/Reshape_1/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/key/einsum/transpose/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/key/einsum/transpose" + op: "Transpose" + input: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/key/Reshape" + input: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/key/einsum/transpose/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 64 + } + dim { + size: 12 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/key/einsum/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\004\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/key/einsum/Reshape" + op: "Reshape" + input: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm_1/batchnorm/add_1" + input: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/key/einsum/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/key/einsum/Reshape_1/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/key/einsum/Reshape_1" + op: "Reshape" + input: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/key/einsum/transpose" + input: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/key/einsum/Reshape_1/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/key/einsum/MatMul" + op: "MatMul" + input: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/key/einsum/Reshape" + input: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/key/einsum/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/key/einsum/Reshape_2/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\020\000\000\000@\000\000\000@\000\000\000\014\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/key/einsum/Reshape_2" + op: "Reshape" + input: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/key/einsum/MatMul" + input: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/key/einsum/Reshape_2/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 64 + } + dim { + size: 12 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/key/einsum/transpose_1/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\000\000\000\000\001\000\000\000\003\000\000\000\002\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/key/einsum/transpose_1" + op: "Transpose" + input: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/key/einsum/Reshape_2" + input: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/key/einsum/transpose_1/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/key/add" + op: "AddV2" + input: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/key/einsum/transpose_1" + input: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/key/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/value/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\000\003\000\000\014\000\000\000@\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/value/Reshape" + op: "Reshape" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/value/kernel/read" + input: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/value/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/value/Reshape_1/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\014\000\000\000@\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/value/Reshape_1" + op: "Reshape" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/value/bias/read" + input: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/value/Reshape_1/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/value/einsum/transpose/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/value/einsum/transpose" + op: "Transpose" + input: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/value/Reshape" + input: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/value/einsum/transpose/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 64 + } + dim { + size: 12 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/value/einsum/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\004\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/value/einsum/Reshape" + op: "Reshape" + input: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm_1/batchnorm/add_1" + input: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/value/einsum/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/value/einsum/Reshape_1/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/value/einsum/Reshape_1" + op: "Reshape" + input: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/value/einsum/transpose" + input: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/value/einsum/Reshape_1/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/value/einsum/MatMul" + op: "MatMul" + input: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/value/einsum/Reshape" + input: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/value/einsum/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/value/einsum/Reshape_2/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\020\000\000\000@\000\000\000@\000\000\000\014\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/value/einsum/Reshape_2" + op: "Reshape" + input: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/value/einsum/MatMul" + input: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/value/einsum/Reshape_2/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 64 + } + dim { + size: 12 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/value/einsum/transpose_1/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\000\000\000\000\001\000\000\000\003\000\000\000\002\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/value/einsum/transpose_1" + op: "Transpose" + input: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/value/einsum/Reshape_2" + input: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/value/einsum/transpose_1/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/value/add" + op: "AddV2" + input: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/value/einsum/transpose_1" + input: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/value/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/transpose/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000\003\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/transpose" + op: "Transpose" + input: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/query/add" + input: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/transpose/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/transpose_1/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000\003\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/transpose_1" + op: "Transpose" + input: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/key/add" + input: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/transpose_1/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/transpose_2/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000\003\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/transpose_2" + op: "Transpose" + input: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/value/add" + input: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/transpose_2/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\020\000\000\000\001\000\000\000@\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/Reshape" + op: "Reshape" + input: "IteratorGetNext:1" + input: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/Reshape/shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 1 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/MatMul" + op: "BatchMatMulV2" + input: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/transpose" + input: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/transpose_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: false + } + } + attr { + key: "adj_y" + value { + b: true + } + } +} +node { + name: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/Mul/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.125 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/Mul" + op: "Mul" + input: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/MatMul" + input: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/Mul/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/ones/shape_as_tensor" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\020\000\000\000\001\000\000\000@\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/ones/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/ones" + op: "Fill" + input: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/ones/shape_as_tensor" + input: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/ones/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 1 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/Cast" + op: "Cast" + input: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/Reshape" + attr { + key: "DstT" + value { + type: DT_FLOAT + } + } + attr { + key: "SrcT" + value { + type: DT_INT32 + } + } + attr { + key: "Truncate" + value { + b: false + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 1 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/MatMul_1" + op: "BatchMatMulV2" + input: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/ones" + input: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/Cast" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 1 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: false + } + } + attr { + key: "adj_y" + value { + b: true + } + } +} +node { + name: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/sub/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/sub" + op: "Sub" + input: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/sub/x" + input: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/MatMul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 1 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/mul_1/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: -10000.0 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/mul_1" + op: "Mul" + input: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/sub" + input: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/mul_1/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 1 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/add" + op: "AddV2" + input: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/Mul" + input: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/mul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/attention_probs" + op: "Softmax" + input: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/MatMul_2" + op: "BatchMatMulV2" + input: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/attention_probs" + input: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/transpose_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: false + } + } + attr { + key: "adj_y" + value { + b: false + } + } +} +node { + name: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/transpose_3/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000\003\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/transpose_3" + op: "Transpose" + input: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/MatMul_2" + input: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/transpose_3/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/output/dense/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\014\000\000\000@\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/output/dense/Reshape" + op: "Reshape" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/output/dense/kernel/read" + input: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/output/dense/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/output/dense/einsum/transpose/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\000\000\000\000\001\000\000\000\003\000\000\000\002\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/output/dense/einsum/transpose" + op: "Transpose" + input: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/transpose_3" + input: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/output/dense/einsum/transpose/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 64 + } + dim { + size: 12 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/output/dense/einsum/transpose_1/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\001\000\000\000\000\000\000\000\002\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/output/dense/einsum/transpose_1" + op: "Transpose" + input: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/output/dense/Reshape" + input: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/output/dense/einsum/transpose_1/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 64 + } + dim { + size: 12 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/output/dense/einsum/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\004\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/output/dense/einsum/Reshape" + op: "Reshape" + input: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/output/dense/einsum/transpose" + input: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/output/dense/einsum/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/output/dense/einsum/Reshape_1/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/output/dense/einsum/Reshape_1" + op: "Reshape" + input: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/output/dense/einsum/transpose_1" + input: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/output/dense/einsum/Reshape_1/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/output/dense/einsum/MatMul" + op: "MatMul" + input: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/output/dense/einsum/Reshape" + input: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/output/dense/einsum/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/output/dense/einsum/Reshape_2/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/output/dense/einsum/Reshape_2" + op: "Reshape" + input: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/output/dense/einsum/MatMul" + input: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/output/dense/einsum/Reshape_2/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/output/dense/add" + op: "AddV2" + input: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/output/dense/einsum/Reshape_2" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/output/dense/bias/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/add" + op: "AddV2" + input: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/output/dense/add" + input: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm_1/batchnorm/add_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm/moments/mean/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 2 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm/moments/mean" + op: "Mean" + input: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/add" + input: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm/moments/mean/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: true + } + } +} +node { + name: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm/moments/StopGradient" + op: "StopGradient" + input: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm/moments/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm/moments/SquaredDifference" + op: "SquaredDifference" + input: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/add" + input: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm/moments/StopGradient" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm/moments/variance/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 2 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm/moments/variance" + op: "Mean" + input: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm/moments/SquaredDifference" + input: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm/moments/variance/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: true + } + } +} +node { + name: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm/batchnorm/add/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999960041972e-13 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm/batchnorm/add" + op: "AddV2" + input: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm/moments/variance" + input: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm/batchnorm/add/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm/batchnorm/Rsqrt" + op: "Rsqrt" + input: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm/batchnorm/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm/batchnorm/mul" + op: "Mul" + input: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm/batchnorm/Rsqrt" + input: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm/gamma/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm/batchnorm/mul_1" + op: "Mul" + input: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/add" + input: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm/batchnorm/mul_2" + op: "Mul" + input: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm/moments/mean" + input: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm/batchnorm/sub" + op: "Sub" + input: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm/beta/read" + input: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm/batchnorm/mul_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm/batchnorm/add_1" + op: "AddV2" + input: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm/batchnorm/mul_1" + input: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm/batchnorm/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/ffn_1/intermediate/dense/einsum/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\004\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/ffn_1/intermediate/dense/einsum/Reshape" + op: "Reshape" + input: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm/batchnorm/add_1" + input: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/ffn_1/intermediate/dense/einsum/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/ffn_1/intermediate/dense/einsum/MatMul" + op: "MatMul" + input: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/ffn_1/intermediate/dense/einsum/Reshape" + input: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/ffn_1/intermediate/dense/einsum/Reshape_1/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\000\014\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/ffn_1/intermediate/dense/einsum/Reshape_1" + op: "Reshape" + input: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/ffn_1/intermediate/dense/einsum/MatMul" + input: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/ffn_1/intermediate/dense/einsum/Reshape_1/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/ffn_1/intermediate/dense/add" + op: "AddV2" + input: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/ffn_1/intermediate/dense/einsum/Reshape_1" + input: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/bias/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/ffn_1/intermediate/Pow/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 3.0 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/ffn_1/intermediate/Pow" + op: "Pow" + input: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/ffn_1/intermediate/dense/add" + input: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/ffn_1/intermediate/Pow/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/ffn_1/intermediate/mul/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.044714998453855515 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/ffn_1/intermediate/mul" + op: "Mul" + input: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/ffn_1/intermediate/mul/x" + input: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/ffn_1/intermediate/Pow" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/ffn_1/intermediate/add" + op: "AddV2" + input: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/ffn_1/intermediate/dense/add" + input: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/ffn_1/intermediate/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/ffn_1/intermediate/mul_1/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.7978845834732056 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/ffn_1/intermediate/mul_1" + op: "Mul" + input: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/ffn_1/intermediate/mul_1/x" + input: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/ffn_1/intermediate/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/ffn_1/intermediate/Tanh" + op: "Tanh" + input: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/ffn_1/intermediate/mul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/ffn_1/intermediate/add_1/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/ffn_1/intermediate/add_1" + op: "AddV2" + input: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/ffn_1/intermediate/add_1/x" + input: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/ffn_1/intermediate/Tanh" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/ffn_1/intermediate/mul_2/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.5 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/ffn_1/intermediate/mul_2" + op: "Mul" + input: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/ffn_1/intermediate/mul_2/x" + input: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/ffn_1/intermediate/add_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/ffn_1/intermediate/mul_3" + op: "Mul" + input: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/ffn_1/intermediate/dense/add" + input: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/ffn_1/intermediate/mul_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\004\000\000\000\014\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape" + op: "Reshape" + input: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/ffn_1/intermediate/mul_3" + input: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/ffn_1/intermediate/output/dense/einsum/MatMul" + op: "MatMul" + input: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape" + input: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape_1/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape_1" + op: "Reshape" + input: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/ffn_1/intermediate/output/dense/einsum/MatMul" + input: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape_1/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/ffn_1/intermediate/output/dense/add" + op: "AddV2" + input: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape_1" + input: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/bias/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/add_1" + op: "AddV2" + input: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/ffn_1/intermediate/output/dense/add" + input: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm/batchnorm/add_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm_1/moments/mean/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 2 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm_1/moments/mean" + op: "Mean" + input: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/add_1" + input: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm_1/moments/mean/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: true + } + } +} +node { + name: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm_1/moments/StopGradient" + op: "StopGradient" + input: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm_1/moments/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm_1/moments/SquaredDifference" + op: "SquaredDifference" + input: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/add_1" + input: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm_1/moments/StopGradient" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm_1/moments/variance/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 2 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm_1/moments/variance" + op: "Mean" + input: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm_1/moments/SquaredDifference" + input: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm_1/moments/variance/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: true + } + } +} +node { + name: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm_1/batchnorm/add/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999960041972e-13 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm_1/batchnorm/add" + op: "AddV2" + input: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm_1/moments/variance" + input: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm_1/batchnorm/add/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm_1/batchnorm/Rsqrt" + op: "Rsqrt" + input: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm_1/batchnorm/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm_1/batchnorm/mul" + op: "Mul" + input: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm_1/batchnorm/Rsqrt" + input: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm_1/gamma/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm_1/batchnorm/mul_1" + op: "Mul" + input: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/add_1" + input: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm_1/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm_1/batchnorm/mul_2" + op: "Mul" + input: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm_1/moments/mean" + input: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm_1/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm_1/batchnorm/sub" + op: "Sub" + input: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm_1/beta/read" + input: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm_1/batchnorm/mul_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm_1/batchnorm/add_1" + op: "AddV2" + input: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm_1/batchnorm/mul_1" + input: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm_1/batchnorm/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/query/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\000\003\000\000\014\000\000\000@\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/query/Reshape" + op: "Reshape" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/kernel/read" + input: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/query/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/query/Reshape_1/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\014\000\000\000@\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/query/Reshape_1" + op: "Reshape" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/bias/read" + input: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/query/Reshape_1/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/query/einsum/transpose/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/query/einsum/transpose" + op: "Transpose" + input: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/query/Reshape" + input: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/query/einsum/transpose/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 64 + } + dim { + size: 12 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/query/einsum/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\004\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/query/einsum/Reshape" + op: "Reshape" + input: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm_1/batchnorm/add_1" + input: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/query/einsum/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/query/einsum/Reshape_1/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/query/einsum/Reshape_1" + op: "Reshape" + input: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/query/einsum/transpose" + input: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/query/einsum/Reshape_1/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/query/einsum/MatMul" + op: "MatMul" + input: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/query/einsum/Reshape" + input: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/query/einsum/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/query/einsum/Reshape_2/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\020\000\000\000@\000\000\000@\000\000\000\014\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/query/einsum/Reshape_2" + op: "Reshape" + input: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/query/einsum/MatMul" + input: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/query/einsum/Reshape_2/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 64 + } + dim { + size: 12 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/query/einsum/transpose_1/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\000\000\000\000\001\000\000\000\003\000\000\000\002\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/query/einsum/transpose_1" + op: "Transpose" + input: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/query/einsum/Reshape_2" + input: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/query/einsum/transpose_1/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/query/add" + op: "AddV2" + input: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/query/einsum/transpose_1" + input: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/query/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/key/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\000\003\000\000\014\000\000\000@\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/key/Reshape" + op: "Reshape" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/kernel/read" + input: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/key/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/key/Reshape_1/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\014\000\000\000@\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/key/Reshape_1" + op: "Reshape" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/bias/read" + input: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/key/Reshape_1/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/key/einsum/transpose/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/key/einsum/transpose" + op: "Transpose" + input: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/key/Reshape" + input: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/key/einsum/transpose/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 64 + } + dim { + size: 12 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/key/einsum/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\004\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/key/einsum/Reshape" + op: "Reshape" + input: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm_1/batchnorm/add_1" + input: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/key/einsum/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/key/einsum/Reshape_1/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/key/einsum/Reshape_1" + op: "Reshape" + input: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/key/einsum/transpose" + input: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/key/einsum/Reshape_1/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/key/einsum/MatMul" + op: "MatMul" + input: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/key/einsum/Reshape" + input: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/key/einsum/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/key/einsum/Reshape_2/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\020\000\000\000@\000\000\000@\000\000\000\014\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/key/einsum/Reshape_2" + op: "Reshape" + input: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/key/einsum/MatMul" + input: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/key/einsum/Reshape_2/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 64 + } + dim { + size: 12 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/key/einsum/transpose_1/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\000\000\000\000\001\000\000\000\003\000\000\000\002\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/key/einsum/transpose_1" + op: "Transpose" + input: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/key/einsum/Reshape_2" + input: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/key/einsum/transpose_1/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/key/add" + op: "AddV2" + input: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/key/einsum/transpose_1" + input: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/key/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/value/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\000\003\000\000\014\000\000\000@\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/value/Reshape" + op: "Reshape" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/value/kernel/read" + input: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/value/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/value/Reshape_1/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\014\000\000\000@\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/value/Reshape_1" + op: "Reshape" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/value/bias/read" + input: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/value/Reshape_1/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/value/einsum/transpose/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/value/einsum/transpose" + op: "Transpose" + input: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/value/Reshape" + input: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/value/einsum/transpose/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 64 + } + dim { + size: 12 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/value/einsum/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\004\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/value/einsum/Reshape" + op: "Reshape" + input: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm_1/batchnorm/add_1" + input: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/value/einsum/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/value/einsum/Reshape_1/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/value/einsum/Reshape_1" + op: "Reshape" + input: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/value/einsum/transpose" + input: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/value/einsum/Reshape_1/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/value/einsum/MatMul" + op: "MatMul" + input: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/value/einsum/Reshape" + input: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/value/einsum/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/value/einsum/Reshape_2/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\020\000\000\000@\000\000\000@\000\000\000\014\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/value/einsum/Reshape_2" + op: "Reshape" + input: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/value/einsum/MatMul" + input: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/value/einsum/Reshape_2/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 64 + } + dim { + size: 12 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/value/einsum/transpose_1/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\000\000\000\000\001\000\000\000\003\000\000\000\002\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/value/einsum/transpose_1" + op: "Transpose" + input: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/value/einsum/Reshape_2" + input: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/value/einsum/transpose_1/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/value/add" + op: "AddV2" + input: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/value/einsum/transpose_1" + input: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/value/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/transpose/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000\003\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/transpose" + op: "Transpose" + input: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/query/add" + input: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/transpose/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/transpose_1/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000\003\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/transpose_1" + op: "Transpose" + input: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/key/add" + input: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/transpose_1/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/transpose_2/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000\003\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/transpose_2" + op: "Transpose" + input: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/value/add" + input: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/transpose_2/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\020\000\000\000\001\000\000\000@\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/Reshape" + op: "Reshape" + input: "IteratorGetNext:1" + input: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/Reshape/shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 1 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/MatMul" + op: "BatchMatMulV2" + input: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/transpose" + input: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/transpose_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: false + } + } + attr { + key: "adj_y" + value { + b: true + } + } +} +node { + name: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/Mul/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.125 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/Mul" + op: "Mul" + input: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/MatMul" + input: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/Mul/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/ones/shape_as_tensor" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\020\000\000\000\001\000\000\000@\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/ones/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/ones" + op: "Fill" + input: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/ones/shape_as_tensor" + input: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/ones/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 1 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/Cast" + op: "Cast" + input: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/Reshape" + attr { + key: "DstT" + value { + type: DT_FLOAT + } + } + attr { + key: "SrcT" + value { + type: DT_INT32 + } + } + attr { + key: "Truncate" + value { + b: false + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 1 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/MatMul_1" + op: "BatchMatMulV2" + input: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/ones" + input: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/Cast" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 1 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: false + } + } + attr { + key: "adj_y" + value { + b: true + } + } +} +node { + name: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/sub/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/sub" + op: "Sub" + input: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/sub/x" + input: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/MatMul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 1 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/mul_1/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: -10000.0 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/mul_1" + op: "Mul" + input: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/sub" + input: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/mul_1/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 1 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/add" + op: "AddV2" + input: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/Mul" + input: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/mul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/attention_probs" + op: "Softmax" + input: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/MatMul_2" + op: "BatchMatMulV2" + input: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/attention_probs" + input: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/transpose_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: false + } + } + attr { + key: "adj_y" + value { + b: false + } + } +} +node { + name: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/transpose_3/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000\003\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/transpose_3" + op: "Transpose" + input: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/MatMul_2" + input: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/transpose_3/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/output/dense/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\014\000\000\000@\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/output/dense/Reshape" + op: "Reshape" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/output/dense/kernel/read" + input: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/output/dense/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/output/dense/einsum/transpose/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\000\000\000\000\001\000\000\000\003\000\000\000\002\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/output/dense/einsum/transpose" + op: "Transpose" + input: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/transpose_3" + input: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/output/dense/einsum/transpose/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 64 + } + dim { + size: 12 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/output/dense/einsum/transpose_1/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\001\000\000\000\000\000\000\000\002\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/output/dense/einsum/transpose_1" + op: "Transpose" + input: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/output/dense/Reshape" + input: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/output/dense/einsum/transpose_1/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 64 + } + dim { + size: 12 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/output/dense/einsum/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\004\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/output/dense/einsum/Reshape" + op: "Reshape" + input: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/output/dense/einsum/transpose" + input: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/output/dense/einsum/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/output/dense/einsum/Reshape_1/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/output/dense/einsum/Reshape_1" + op: "Reshape" + input: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/output/dense/einsum/transpose_1" + input: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/output/dense/einsum/Reshape_1/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/output/dense/einsum/MatMul" + op: "MatMul" + input: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/output/dense/einsum/Reshape" + input: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/output/dense/einsum/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/output/dense/einsum/Reshape_2/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/output/dense/einsum/Reshape_2" + op: "Reshape" + input: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/output/dense/einsum/MatMul" + input: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/output/dense/einsum/Reshape_2/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/output/dense/add" + op: "AddV2" + input: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/output/dense/einsum/Reshape_2" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/output/dense/bias/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/add" + op: "AddV2" + input: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/output/dense/add" + input: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm_1/batchnorm/add_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm/moments/mean/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 2 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm/moments/mean" + op: "Mean" + input: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/add" + input: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm/moments/mean/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: true + } + } +} +node { + name: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm/moments/StopGradient" + op: "StopGradient" + input: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm/moments/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm/moments/SquaredDifference" + op: "SquaredDifference" + input: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/add" + input: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm/moments/StopGradient" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm/moments/variance/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 2 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm/moments/variance" + op: "Mean" + input: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm/moments/SquaredDifference" + input: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm/moments/variance/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: true + } + } +} +node { + name: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm/batchnorm/add/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999960041972e-13 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm/batchnorm/add" + op: "AddV2" + input: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm/moments/variance" + input: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm/batchnorm/add/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm/batchnorm/Rsqrt" + op: "Rsqrt" + input: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm/batchnorm/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm/batchnorm/mul" + op: "Mul" + input: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm/batchnorm/Rsqrt" + input: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm/gamma/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm/batchnorm/mul_1" + op: "Mul" + input: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/add" + input: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm/batchnorm/mul_2" + op: "Mul" + input: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm/moments/mean" + input: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm/batchnorm/sub" + op: "Sub" + input: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm/beta/read" + input: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm/batchnorm/mul_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm/batchnorm/add_1" + op: "AddV2" + input: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm/batchnorm/mul_1" + input: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm/batchnorm/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/ffn_1/intermediate/dense/einsum/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\004\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/ffn_1/intermediate/dense/einsum/Reshape" + op: "Reshape" + input: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm/batchnorm/add_1" + input: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/ffn_1/intermediate/dense/einsum/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/ffn_1/intermediate/dense/einsum/MatMul" + op: "MatMul" + input: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/ffn_1/intermediate/dense/einsum/Reshape" + input: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/ffn_1/intermediate/dense/einsum/Reshape_1/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\000\014\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/ffn_1/intermediate/dense/einsum/Reshape_1" + op: "Reshape" + input: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/ffn_1/intermediate/dense/einsum/MatMul" + input: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/ffn_1/intermediate/dense/einsum/Reshape_1/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/ffn_1/intermediate/dense/add" + op: "AddV2" + input: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/ffn_1/intermediate/dense/einsum/Reshape_1" + input: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/bias/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/ffn_1/intermediate/Pow/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 3.0 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/ffn_1/intermediate/Pow" + op: "Pow" + input: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/ffn_1/intermediate/dense/add" + input: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/ffn_1/intermediate/Pow/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/ffn_1/intermediate/mul/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.044714998453855515 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/ffn_1/intermediate/mul" + op: "Mul" + input: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/ffn_1/intermediate/mul/x" + input: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/ffn_1/intermediate/Pow" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/ffn_1/intermediate/add" + op: "AddV2" + input: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/ffn_1/intermediate/dense/add" + input: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/ffn_1/intermediate/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/ffn_1/intermediate/mul_1/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.7978845834732056 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/ffn_1/intermediate/mul_1" + op: "Mul" + input: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/ffn_1/intermediate/mul_1/x" + input: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/ffn_1/intermediate/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/ffn_1/intermediate/Tanh" + op: "Tanh" + input: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/ffn_1/intermediate/mul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/ffn_1/intermediate/add_1/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/ffn_1/intermediate/add_1" + op: "AddV2" + input: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/ffn_1/intermediate/add_1/x" + input: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/ffn_1/intermediate/Tanh" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/ffn_1/intermediate/mul_2/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.5 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/ffn_1/intermediate/mul_2" + op: "Mul" + input: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/ffn_1/intermediate/mul_2/x" + input: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/ffn_1/intermediate/add_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/ffn_1/intermediate/mul_3" + op: "Mul" + input: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/ffn_1/intermediate/dense/add" + input: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/ffn_1/intermediate/mul_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\004\000\000\000\014\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape" + op: "Reshape" + input: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/ffn_1/intermediate/mul_3" + input: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/ffn_1/intermediate/output/dense/einsum/MatMul" + op: "MatMul" + input: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape" + input: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape_1/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape_1" + op: "Reshape" + input: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/ffn_1/intermediate/output/dense/einsum/MatMul" + input: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape_1/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/ffn_1/intermediate/output/dense/add" + op: "AddV2" + input: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape_1" + input: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/bias/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/add_1" + op: "AddV2" + input: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/ffn_1/intermediate/output/dense/add" + input: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm/batchnorm/add_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm_1/moments/mean/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 2 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm_1/moments/mean" + op: "Mean" + input: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/add_1" + input: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm_1/moments/mean/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: true + } + } +} +node { + name: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm_1/moments/StopGradient" + op: "StopGradient" + input: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm_1/moments/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm_1/moments/SquaredDifference" + op: "SquaredDifference" + input: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/add_1" + input: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm_1/moments/StopGradient" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm_1/moments/variance/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 2 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm_1/moments/variance" + op: "Mean" + input: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm_1/moments/SquaredDifference" + input: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm_1/moments/variance/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: true + } + } +} +node { + name: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm_1/batchnorm/add/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999960041972e-13 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm_1/batchnorm/add" + op: "AddV2" + input: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm_1/moments/variance" + input: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm_1/batchnorm/add/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm_1/batchnorm/Rsqrt" + op: "Rsqrt" + input: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm_1/batchnorm/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm_1/batchnorm/mul" + op: "Mul" + input: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm_1/batchnorm/Rsqrt" + input: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm_1/gamma/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm_1/batchnorm/mul_1" + op: "Mul" + input: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/add_1" + input: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm_1/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm_1/batchnorm/mul_2" + op: "Mul" + input: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm_1/moments/mean" + input: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm_1/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm_1/batchnorm/sub" + op: "Sub" + input: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm_1/beta/read" + input: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm_1/batchnorm/mul_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm_1/batchnorm/add_1" + op: "AddV2" + input: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm_1/batchnorm/mul_1" + input: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm_1/batchnorm/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/query/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\000\003\000\000\014\000\000\000@\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/query/Reshape" + op: "Reshape" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/kernel/read" + input: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/query/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/query/Reshape_1/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\014\000\000\000@\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/query/Reshape_1" + op: "Reshape" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/bias/read" + input: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/query/Reshape_1/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/query/einsum/transpose/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/query/einsum/transpose" + op: "Transpose" + input: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/query/Reshape" + input: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/query/einsum/transpose/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 64 + } + dim { + size: 12 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/query/einsum/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\004\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/query/einsum/Reshape" + op: "Reshape" + input: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm_1/batchnorm/add_1" + input: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/query/einsum/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/query/einsum/Reshape_1/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/query/einsum/Reshape_1" + op: "Reshape" + input: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/query/einsum/transpose" + input: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/query/einsum/Reshape_1/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/query/einsum/MatMul" + op: "MatMul" + input: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/query/einsum/Reshape" + input: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/query/einsum/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/query/einsum/Reshape_2/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\020\000\000\000@\000\000\000@\000\000\000\014\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/query/einsum/Reshape_2" + op: "Reshape" + input: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/query/einsum/MatMul" + input: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/query/einsum/Reshape_2/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 64 + } + dim { + size: 12 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/query/einsum/transpose_1/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\000\000\000\000\001\000\000\000\003\000\000\000\002\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/query/einsum/transpose_1" + op: "Transpose" + input: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/query/einsum/Reshape_2" + input: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/query/einsum/transpose_1/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/query/add" + op: "AddV2" + input: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/query/einsum/transpose_1" + input: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/query/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/key/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\000\003\000\000\014\000\000\000@\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/key/Reshape" + op: "Reshape" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/kernel/read" + input: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/key/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/key/Reshape_1/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\014\000\000\000@\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/key/Reshape_1" + op: "Reshape" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/bias/read" + input: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/key/Reshape_1/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/key/einsum/transpose/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/key/einsum/transpose" + op: "Transpose" + input: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/key/Reshape" + input: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/key/einsum/transpose/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 64 + } + dim { + size: 12 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/key/einsum/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\004\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/key/einsum/Reshape" + op: "Reshape" + input: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm_1/batchnorm/add_1" + input: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/key/einsum/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/key/einsum/Reshape_1/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/key/einsum/Reshape_1" + op: "Reshape" + input: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/key/einsum/transpose" + input: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/key/einsum/Reshape_1/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/key/einsum/MatMul" + op: "MatMul" + input: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/key/einsum/Reshape" + input: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/key/einsum/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/key/einsum/Reshape_2/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\020\000\000\000@\000\000\000@\000\000\000\014\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/key/einsum/Reshape_2" + op: "Reshape" + input: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/key/einsum/MatMul" + input: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/key/einsum/Reshape_2/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 64 + } + dim { + size: 12 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/key/einsum/transpose_1/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\000\000\000\000\001\000\000\000\003\000\000\000\002\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/key/einsum/transpose_1" + op: "Transpose" + input: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/key/einsum/Reshape_2" + input: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/key/einsum/transpose_1/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/key/add" + op: "AddV2" + input: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/key/einsum/transpose_1" + input: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/key/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/value/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\000\003\000\000\014\000\000\000@\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/value/Reshape" + op: "Reshape" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/value/kernel/read" + input: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/value/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/value/Reshape_1/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\014\000\000\000@\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/value/Reshape_1" + op: "Reshape" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/value/bias/read" + input: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/value/Reshape_1/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/value/einsum/transpose/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/value/einsum/transpose" + op: "Transpose" + input: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/value/Reshape" + input: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/value/einsum/transpose/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 64 + } + dim { + size: 12 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/value/einsum/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\004\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/value/einsum/Reshape" + op: "Reshape" + input: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm_1/batchnorm/add_1" + input: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/value/einsum/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/value/einsum/Reshape_1/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/value/einsum/Reshape_1" + op: "Reshape" + input: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/value/einsum/transpose" + input: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/value/einsum/Reshape_1/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/value/einsum/MatMul" + op: "MatMul" + input: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/value/einsum/Reshape" + input: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/value/einsum/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/value/einsum/Reshape_2/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\020\000\000\000@\000\000\000@\000\000\000\014\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/value/einsum/Reshape_2" + op: "Reshape" + input: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/value/einsum/MatMul" + input: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/value/einsum/Reshape_2/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 64 + } + dim { + size: 12 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/value/einsum/transpose_1/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\000\000\000\000\001\000\000\000\003\000\000\000\002\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/value/einsum/transpose_1" + op: "Transpose" + input: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/value/einsum/Reshape_2" + input: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/value/einsum/transpose_1/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/value/add" + op: "AddV2" + input: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/value/einsum/transpose_1" + input: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/value/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/transpose/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000\003\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/transpose" + op: "Transpose" + input: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/query/add" + input: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/transpose/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/transpose_1/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000\003\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/transpose_1" + op: "Transpose" + input: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/key/add" + input: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/transpose_1/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/transpose_2/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000\003\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/transpose_2" + op: "Transpose" + input: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/value/add" + input: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/transpose_2/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\020\000\000\000\001\000\000\000@\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/Reshape" + op: "Reshape" + input: "IteratorGetNext:1" + input: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/Reshape/shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 1 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/MatMul" + op: "BatchMatMulV2" + input: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/transpose" + input: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/transpose_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: false + } + } + attr { + key: "adj_y" + value { + b: true + } + } +} +node { + name: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/Mul/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.125 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/Mul" + op: "Mul" + input: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/MatMul" + input: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/Mul/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/ones/shape_as_tensor" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\020\000\000\000\001\000\000\000@\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/ones/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/ones" + op: "Fill" + input: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/ones/shape_as_tensor" + input: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/ones/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 1 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/Cast" + op: "Cast" + input: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/Reshape" + attr { + key: "DstT" + value { + type: DT_FLOAT + } + } + attr { + key: "SrcT" + value { + type: DT_INT32 + } + } + attr { + key: "Truncate" + value { + b: false + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 1 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/MatMul_1" + op: "BatchMatMulV2" + input: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/ones" + input: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/Cast" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 1 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: false + } + } + attr { + key: "adj_y" + value { + b: true + } + } +} +node { + name: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/sub/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/sub" + op: "Sub" + input: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/sub/x" + input: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/MatMul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 1 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/mul_1/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: -10000.0 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/mul_1" + op: "Mul" + input: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/sub" + input: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/mul_1/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 1 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/add" + op: "AddV2" + input: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/Mul" + input: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/mul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/attention_probs" + op: "Softmax" + input: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/MatMul_2" + op: "BatchMatMulV2" + input: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/attention_probs" + input: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/transpose_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: false + } + } + attr { + key: "adj_y" + value { + b: false + } + } +} +node { + name: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/transpose_3/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000\003\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/transpose_3" + op: "Transpose" + input: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/MatMul_2" + input: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/transpose_3/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/output/dense/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\014\000\000\000@\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/output/dense/Reshape" + op: "Reshape" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/output/dense/kernel/read" + input: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/output/dense/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/output/dense/einsum/transpose/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\000\000\000\000\001\000\000\000\003\000\000\000\002\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/output/dense/einsum/transpose" + op: "Transpose" + input: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/transpose_3" + input: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/output/dense/einsum/transpose/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 64 + } + dim { + size: 12 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/output/dense/einsum/transpose_1/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\001\000\000\000\000\000\000\000\002\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/output/dense/einsum/transpose_1" + op: "Transpose" + input: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/output/dense/Reshape" + input: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/output/dense/einsum/transpose_1/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 64 + } + dim { + size: 12 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/output/dense/einsum/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\004\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/output/dense/einsum/Reshape" + op: "Reshape" + input: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/output/dense/einsum/transpose" + input: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/output/dense/einsum/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/output/dense/einsum/Reshape_1/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/output/dense/einsum/Reshape_1" + op: "Reshape" + input: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/output/dense/einsum/transpose_1" + input: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/output/dense/einsum/Reshape_1/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/output/dense/einsum/MatMul" + op: "MatMul" + input: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/output/dense/einsum/Reshape" + input: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/output/dense/einsum/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/output/dense/einsum/Reshape_2/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/output/dense/einsum/Reshape_2" + op: "Reshape" + input: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/output/dense/einsum/MatMul" + input: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/output/dense/einsum/Reshape_2/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/output/dense/add" + op: "AddV2" + input: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/output/dense/einsum/Reshape_2" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/output/dense/bias/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/add" + op: "AddV2" + input: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/output/dense/add" + input: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm_1/batchnorm/add_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm/moments/mean/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 2 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm/moments/mean" + op: "Mean" + input: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/add" + input: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm/moments/mean/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: true + } + } +} +node { + name: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm/moments/StopGradient" + op: "StopGradient" + input: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm/moments/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm/moments/SquaredDifference" + op: "SquaredDifference" + input: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/add" + input: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm/moments/StopGradient" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm/moments/variance/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 2 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm/moments/variance" + op: "Mean" + input: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm/moments/SquaredDifference" + input: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm/moments/variance/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: true + } + } +} +node { + name: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm/batchnorm/add/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999960041972e-13 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm/batchnorm/add" + op: "AddV2" + input: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm/moments/variance" + input: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm/batchnorm/add/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm/batchnorm/Rsqrt" + op: "Rsqrt" + input: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm/batchnorm/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm/batchnorm/mul" + op: "Mul" + input: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm/batchnorm/Rsqrt" + input: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm/gamma/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm/batchnorm/mul_1" + op: "Mul" + input: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/add" + input: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm/batchnorm/mul_2" + op: "Mul" + input: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm/moments/mean" + input: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm/batchnorm/sub" + op: "Sub" + input: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm/beta/read" + input: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm/batchnorm/mul_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm/batchnorm/add_1" + op: "AddV2" + input: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm/batchnorm/mul_1" + input: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm/batchnorm/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/ffn_1/intermediate/dense/einsum/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\004\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/ffn_1/intermediate/dense/einsum/Reshape" + op: "Reshape" + input: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm/batchnorm/add_1" + input: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/ffn_1/intermediate/dense/einsum/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/ffn_1/intermediate/dense/einsum/MatMul" + op: "MatMul" + input: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/ffn_1/intermediate/dense/einsum/Reshape" + input: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/ffn_1/intermediate/dense/einsum/Reshape_1/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\000\014\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/ffn_1/intermediate/dense/einsum/Reshape_1" + op: "Reshape" + input: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/ffn_1/intermediate/dense/einsum/MatMul" + input: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/ffn_1/intermediate/dense/einsum/Reshape_1/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/ffn_1/intermediate/dense/add" + op: "AddV2" + input: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/ffn_1/intermediate/dense/einsum/Reshape_1" + input: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/bias/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/ffn_1/intermediate/Pow/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 3.0 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/ffn_1/intermediate/Pow" + op: "Pow" + input: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/ffn_1/intermediate/dense/add" + input: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/ffn_1/intermediate/Pow/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/ffn_1/intermediate/mul/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.044714998453855515 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/ffn_1/intermediate/mul" + op: "Mul" + input: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/ffn_1/intermediate/mul/x" + input: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/ffn_1/intermediate/Pow" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/ffn_1/intermediate/add" + op: "AddV2" + input: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/ffn_1/intermediate/dense/add" + input: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/ffn_1/intermediate/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/ffn_1/intermediate/mul_1/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.7978845834732056 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/ffn_1/intermediate/mul_1" + op: "Mul" + input: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/ffn_1/intermediate/mul_1/x" + input: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/ffn_1/intermediate/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/ffn_1/intermediate/Tanh" + op: "Tanh" + input: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/ffn_1/intermediate/mul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/ffn_1/intermediate/add_1/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/ffn_1/intermediate/add_1" + op: "AddV2" + input: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/ffn_1/intermediate/add_1/x" + input: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/ffn_1/intermediate/Tanh" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/ffn_1/intermediate/mul_2/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.5 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/ffn_1/intermediate/mul_2" + op: "Mul" + input: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/ffn_1/intermediate/mul_2/x" + input: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/ffn_1/intermediate/add_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/ffn_1/intermediate/mul_3" + op: "Mul" + input: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/ffn_1/intermediate/dense/add" + input: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/ffn_1/intermediate/mul_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\004\000\000\000\014\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape" + op: "Reshape" + input: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/ffn_1/intermediate/mul_3" + input: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/ffn_1/intermediate/output/dense/einsum/MatMul" + op: "MatMul" + input: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape" + input: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape_1/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape_1" + op: "Reshape" + input: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/ffn_1/intermediate/output/dense/einsum/MatMul" + input: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape_1/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/ffn_1/intermediate/output/dense/add" + op: "AddV2" + input: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape_1" + input: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/bias/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/add_1" + op: "AddV2" + input: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/ffn_1/intermediate/output/dense/add" + input: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm/batchnorm/add_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm_1/moments/mean/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 2 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm_1/moments/mean" + op: "Mean" + input: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/add_1" + input: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm_1/moments/mean/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: true + } + } +} +node { + name: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm_1/moments/StopGradient" + op: "StopGradient" + input: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm_1/moments/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm_1/moments/SquaredDifference" + op: "SquaredDifference" + input: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/add_1" + input: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm_1/moments/StopGradient" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm_1/moments/variance/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 2 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm_1/moments/variance" + op: "Mean" + input: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm_1/moments/SquaredDifference" + input: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm_1/moments/variance/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: true + } + } +} +node { + name: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm_1/batchnorm/add/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999960041972e-13 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm_1/batchnorm/add" + op: "AddV2" + input: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm_1/moments/variance" + input: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm_1/batchnorm/add/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm_1/batchnorm/Rsqrt" + op: "Rsqrt" + input: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm_1/batchnorm/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm_1/batchnorm/mul" + op: "Mul" + input: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm_1/batchnorm/Rsqrt" + input: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm_1/gamma/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm_1/batchnorm/mul_1" + op: "Mul" + input: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/add_1" + input: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm_1/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm_1/batchnorm/mul_2" + op: "Mul" + input: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm_1/moments/mean" + input: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm_1/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm_1/batchnorm/sub" + op: "Sub" + input: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm_1/beta/read" + input: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm_1/batchnorm/mul_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm_1/batchnorm/add_1" + op: "AddV2" + input: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm_1/batchnorm/mul_1" + input: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm_1/batchnorm/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/query/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\000\003\000\000\014\000\000\000@\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/query/Reshape" + op: "Reshape" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/kernel/read" + input: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/query/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/query/Reshape_1/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\014\000\000\000@\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/query/Reshape_1" + op: "Reshape" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/bias/read" + input: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/query/Reshape_1/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/query/einsum/transpose/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/query/einsum/transpose" + op: "Transpose" + input: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/query/Reshape" + input: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/query/einsum/transpose/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 64 + } + dim { + size: 12 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/query/einsum/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\004\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/query/einsum/Reshape" + op: "Reshape" + input: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm_1/batchnorm/add_1" + input: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/query/einsum/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/query/einsum/Reshape_1/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/query/einsum/Reshape_1" + op: "Reshape" + input: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/query/einsum/transpose" + input: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/query/einsum/Reshape_1/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/query/einsum/MatMul" + op: "MatMul" + input: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/query/einsum/Reshape" + input: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/query/einsum/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/query/einsum/Reshape_2/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\020\000\000\000@\000\000\000@\000\000\000\014\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/query/einsum/Reshape_2" + op: "Reshape" + input: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/query/einsum/MatMul" + input: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/query/einsum/Reshape_2/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 64 + } + dim { + size: 12 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/query/einsum/transpose_1/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\000\000\000\000\001\000\000\000\003\000\000\000\002\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/query/einsum/transpose_1" + op: "Transpose" + input: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/query/einsum/Reshape_2" + input: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/query/einsum/transpose_1/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/query/add" + op: "AddV2" + input: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/query/einsum/transpose_1" + input: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/query/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/key/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\000\003\000\000\014\000\000\000@\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/key/Reshape" + op: "Reshape" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/kernel/read" + input: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/key/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/key/Reshape_1/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\014\000\000\000@\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/key/Reshape_1" + op: "Reshape" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/bias/read" + input: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/key/Reshape_1/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/key/einsum/transpose/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/key/einsum/transpose" + op: "Transpose" + input: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/key/Reshape" + input: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/key/einsum/transpose/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 64 + } + dim { + size: 12 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/key/einsum/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\004\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/key/einsum/Reshape" + op: "Reshape" + input: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm_1/batchnorm/add_1" + input: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/key/einsum/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/key/einsum/Reshape_1/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/key/einsum/Reshape_1" + op: "Reshape" + input: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/key/einsum/transpose" + input: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/key/einsum/Reshape_1/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/key/einsum/MatMul" + op: "MatMul" + input: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/key/einsum/Reshape" + input: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/key/einsum/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/key/einsum/Reshape_2/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\020\000\000\000@\000\000\000@\000\000\000\014\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/key/einsum/Reshape_2" + op: "Reshape" + input: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/key/einsum/MatMul" + input: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/key/einsum/Reshape_2/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 64 + } + dim { + size: 12 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/key/einsum/transpose_1/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\000\000\000\000\001\000\000\000\003\000\000\000\002\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/key/einsum/transpose_1" + op: "Transpose" + input: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/key/einsum/Reshape_2" + input: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/key/einsum/transpose_1/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/key/add" + op: "AddV2" + input: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/key/einsum/transpose_1" + input: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/key/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/value/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\000\003\000\000\014\000\000\000@\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/value/Reshape" + op: "Reshape" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/value/kernel/read" + input: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/value/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/value/Reshape_1/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\014\000\000\000@\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/value/Reshape_1" + op: "Reshape" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/value/bias/read" + input: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/value/Reshape_1/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/value/einsum/transpose/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/value/einsum/transpose" + op: "Transpose" + input: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/value/Reshape" + input: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/value/einsum/transpose/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 64 + } + dim { + size: 12 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/value/einsum/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\004\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/value/einsum/Reshape" + op: "Reshape" + input: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm_1/batchnorm/add_1" + input: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/value/einsum/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/value/einsum/Reshape_1/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/value/einsum/Reshape_1" + op: "Reshape" + input: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/value/einsum/transpose" + input: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/value/einsum/Reshape_1/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/value/einsum/MatMul" + op: "MatMul" + input: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/value/einsum/Reshape" + input: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/value/einsum/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/value/einsum/Reshape_2/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\020\000\000\000@\000\000\000@\000\000\000\014\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/value/einsum/Reshape_2" + op: "Reshape" + input: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/value/einsum/MatMul" + input: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/value/einsum/Reshape_2/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 64 + } + dim { + size: 12 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/value/einsum/transpose_1/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\000\000\000\000\001\000\000\000\003\000\000\000\002\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/value/einsum/transpose_1" + op: "Transpose" + input: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/value/einsum/Reshape_2" + input: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/value/einsum/transpose_1/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/value/add" + op: "AddV2" + input: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/value/einsum/transpose_1" + input: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/value/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/transpose/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000\003\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/transpose" + op: "Transpose" + input: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/query/add" + input: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/transpose/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/transpose_1/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000\003\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/transpose_1" + op: "Transpose" + input: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/key/add" + input: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/transpose_1/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/transpose_2/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000\003\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/transpose_2" + op: "Transpose" + input: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/value/add" + input: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/transpose_2/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\020\000\000\000\001\000\000\000@\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/Reshape" + op: "Reshape" + input: "IteratorGetNext:1" + input: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/Reshape/shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 1 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/MatMul" + op: "BatchMatMulV2" + input: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/transpose" + input: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/transpose_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: false + } + } + attr { + key: "adj_y" + value { + b: true + } + } +} +node { + name: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/Mul/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.125 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/Mul" + op: "Mul" + input: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/MatMul" + input: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/Mul/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/ones/shape_as_tensor" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\020\000\000\000\001\000\000\000@\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/ones/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/ones" + op: "Fill" + input: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/ones/shape_as_tensor" + input: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/ones/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 1 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/Cast" + op: "Cast" + input: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/Reshape" + attr { + key: "DstT" + value { + type: DT_FLOAT + } + } + attr { + key: "SrcT" + value { + type: DT_INT32 + } + } + attr { + key: "Truncate" + value { + b: false + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 1 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/MatMul_1" + op: "BatchMatMulV2" + input: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/ones" + input: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/Cast" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 1 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: false + } + } + attr { + key: "adj_y" + value { + b: true + } + } +} +node { + name: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/sub/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/sub" + op: "Sub" + input: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/sub/x" + input: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/MatMul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 1 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/mul_1/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: -10000.0 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/mul_1" + op: "Mul" + input: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/sub" + input: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/mul_1/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 1 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/add" + op: "AddV2" + input: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/Mul" + input: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/mul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/attention_probs" + op: "Softmax" + input: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/MatMul_2" + op: "BatchMatMulV2" + input: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/attention_probs" + input: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/transpose_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: false + } + } + attr { + key: "adj_y" + value { + b: false + } + } +} +node { + name: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/transpose_3/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000\003\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/transpose_3" + op: "Transpose" + input: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/MatMul_2" + input: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/transpose_3/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/output/dense/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\014\000\000\000@\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/output/dense/Reshape" + op: "Reshape" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/output/dense/kernel/read" + input: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/output/dense/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/output/dense/einsum/transpose/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\000\000\000\000\001\000\000\000\003\000\000\000\002\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/output/dense/einsum/transpose" + op: "Transpose" + input: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/transpose_3" + input: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/output/dense/einsum/transpose/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 64 + } + dim { + size: 12 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/output/dense/einsum/transpose_1/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\001\000\000\000\000\000\000\000\002\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/output/dense/einsum/transpose_1" + op: "Transpose" + input: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/output/dense/Reshape" + input: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/output/dense/einsum/transpose_1/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 64 + } + dim { + size: 12 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/output/dense/einsum/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\004\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/output/dense/einsum/Reshape" + op: "Reshape" + input: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/output/dense/einsum/transpose" + input: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/output/dense/einsum/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/output/dense/einsum/Reshape_1/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/output/dense/einsum/Reshape_1" + op: "Reshape" + input: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/output/dense/einsum/transpose_1" + input: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/output/dense/einsum/Reshape_1/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/output/dense/einsum/MatMul" + op: "MatMul" + input: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/output/dense/einsum/Reshape" + input: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/output/dense/einsum/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/output/dense/einsum/Reshape_2/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/output/dense/einsum/Reshape_2" + op: "Reshape" + input: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/output/dense/einsum/MatMul" + input: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/output/dense/einsum/Reshape_2/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/output/dense/add" + op: "AddV2" + input: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/output/dense/einsum/Reshape_2" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/output/dense/bias/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/add" + op: "AddV2" + input: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/output/dense/add" + input: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm_1/batchnorm/add_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm/moments/mean/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 2 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm/moments/mean" + op: "Mean" + input: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/add" + input: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm/moments/mean/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: true + } + } +} +node { + name: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm/moments/StopGradient" + op: "StopGradient" + input: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm/moments/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm/moments/SquaredDifference" + op: "SquaredDifference" + input: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/add" + input: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm/moments/StopGradient" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm/moments/variance/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 2 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm/moments/variance" + op: "Mean" + input: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm/moments/SquaredDifference" + input: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm/moments/variance/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: true + } + } +} +node { + name: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm/batchnorm/add/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999960041972e-13 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm/batchnorm/add" + op: "AddV2" + input: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm/moments/variance" + input: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm/batchnorm/add/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm/batchnorm/Rsqrt" + op: "Rsqrt" + input: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm/batchnorm/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm/batchnorm/mul" + op: "Mul" + input: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm/batchnorm/Rsqrt" + input: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm/gamma/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm/batchnorm/mul_1" + op: "Mul" + input: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/add" + input: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm/batchnorm/mul_2" + op: "Mul" + input: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm/moments/mean" + input: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm/batchnorm/sub" + op: "Sub" + input: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm/beta/read" + input: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm/batchnorm/mul_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm/batchnorm/add_1" + op: "AddV2" + input: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm/batchnorm/mul_1" + input: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm/batchnorm/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/ffn_1/intermediate/dense/einsum/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\004\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/ffn_1/intermediate/dense/einsum/Reshape" + op: "Reshape" + input: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm/batchnorm/add_1" + input: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/ffn_1/intermediate/dense/einsum/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/ffn_1/intermediate/dense/einsum/MatMul" + op: "MatMul" + input: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/ffn_1/intermediate/dense/einsum/Reshape" + input: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/ffn_1/intermediate/dense/einsum/Reshape_1/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\000\014\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/ffn_1/intermediate/dense/einsum/Reshape_1" + op: "Reshape" + input: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/ffn_1/intermediate/dense/einsum/MatMul" + input: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/ffn_1/intermediate/dense/einsum/Reshape_1/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/ffn_1/intermediate/dense/add" + op: "AddV2" + input: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/ffn_1/intermediate/dense/einsum/Reshape_1" + input: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/bias/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/ffn_1/intermediate/Pow/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 3.0 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/ffn_1/intermediate/Pow" + op: "Pow" + input: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/ffn_1/intermediate/dense/add" + input: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/ffn_1/intermediate/Pow/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/ffn_1/intermediate/mul/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.044714998453855515 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/ffn_1/intermediate/mul" + op: "Mul" + input: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/ffn_1/intermediate/mul/x" + input: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/ffn_1/intermediate/Pow" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/ffn_1/intermediate/add" + op: "AddV2" + input: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/ffn_1/intermediate/dense/add" + input: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/ffn_1/intermediate/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/ffn_1/intermediate/mul_1/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.7978845834732056 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/ffn_1/intermediate/mul_1" + op: "Mul" + input: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/ffn_1/intermediate/mul_1/x" + input: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/ffn_1/intermediate/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/ffn_1/intermediate/Tanh" + op: "Tanh" + input: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/ffn_1/intermediate/mul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/ffn_1/intermediate/add_1/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/ffn_1/intermediate/add_1" + op: "AddV2" + input: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/ffn_1/intermediate/add_1/x" + input: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/ffn_1/intermediate/Tanh" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/ffn_1/intermediate/mul_2/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.5 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/ffn_1/intermediate/mul_2" + op: "Mul" + input: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/ffn_1/intermediate/mul_2/x" + input: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/ffn_1/intermediate/add_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/ffn_1/intermediate/mul_3" + op: "Mul" + input: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/ffn_1/intermediate/dense/add" + input: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/ffn_1/intermediate/mul_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\004\000\000\000\014\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape" + op: "Reshape" + input: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/ffn_1/intermediate/mul_3" + input: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/ffn_1/intermediate/output/dense/einsum/MatMul" + op: "MatMul" + input: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape" + input: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape_1/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape_1" + op: "Reshape" + input: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/ffn_1/intermediate/output/dense/einsum/MatMul" + input: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape_1/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/ffn_1/intermediate/output/dense/add" + op: "AddV2" + input: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape_1" + input: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/bias/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/add_1" + op: "AddV2" + input: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/ffn_1/intermediate/output/dense/add" + input: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm/batchnorm/add_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm_1/moments/mean/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 2 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm_1/moments/mean" + op: "Mean" + input: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/add_1" + input: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm_1/moments/mean/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: true + } + } +} +node { + name: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm_1/moments/StopGradient" + op: "StopGradient" + input: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm_1/moments/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm_1/moments/SquaredDifference" + op: "SquaredDifference" + input: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/add_1" + input: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm_1/moments/StopGradient" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm_1/moments/variance/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 2 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm_1/moments/variance" + op: "Mean" + input: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm_1/moments/SquaredDifference" + input: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm_1/moments/variance/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: true + } + } +} +node { + name: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm_1/batchnorm/add/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999960041972e-13 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm_1/batchnorm/add" + op: "AddV2" + input: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm_1/moments/variance" + input: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm_1/batchnorm/add/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm_1/batchnorm/Rsqrt" + op: "Rsqrt" + input: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm_1/batchnorm/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm_1/batchnorm/mul" + op: "Mul" + input: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm_1/batchnorm/Rsqrt" + input: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm_1/gamma/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm_1/batchnorm/mul_1" + op: "Mul" + input: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/add_1" + input: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm_1/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm_1/batchnorm/mul_2" + op: "Mul" + input: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm_1/moments/mean" + input: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm_1/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm_1/batchnorm/sub" + op: "Sub" + input: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm_1/beta/read" + input: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm_1/batchnorm/mul_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm_1/batchnorm/add_1" + op: "AddV2" + input: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm_1/batchnorm/mul_1" + input: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm_1/batchnorm/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/query/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\000\003\000\000\014\000\000\000@\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/query/Reshape" + op: "Reshape" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/kernel/read" + input: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/query/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/query/Reshape_1/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\014\000\000\000@\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/query/Reshape_1" + op: "Reshape" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/bias/read" + input: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/query/Reshape_1/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/query/einsum/transpose/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/query/einsum/transpose" + op: "Transpose" + input: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/query/Reshape" + input: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/query/einsum/transpose/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 64 + } + dim { + size: 12 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/query/einsum/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\004\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/query/einsum/Reshape" + op: "Reshape" + input: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm_1/batchnorm/add_1" + input: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/query/einsum/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/query/einsum/Reshape_1/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/query/einsum/Reshape_1" + op: "Reshape" + input: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/query/einsum/transpose" + input: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/query/einsum/Reshape_1/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/query/einsum/MatMul" + op: "MatMul" + input: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/query/einsum/Reshape" + input: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/query/einsum/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/query/einsum/Reshape_2/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\020\000\000\000@\000\000\000@\000\000\000\014\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/query/einsum/Reshape_2" + op: "Reshape" + input: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/query/einsum/MatMul" + input: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/query/einsum/Reshape_2/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 64 + } + dim { + size: 12 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/query/einsum/transpose_1/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\000\000\000\000\001\000\000\000\003\000\000\000\002\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/query/einsum/transpose_1" + op: "Transpose" + input: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/query/einsum/Reshape_2" + input: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/query/einsum/transpose_1/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/query/add" + op: "AddV2" + input: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/query/einsum/transpose_1" + input: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/query/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/key/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\000\003\000\000\014\000\000\000@\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/key/Reshape" + op: "Reshape" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/kernel/read" + input: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/key/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/key/Reshape_1/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\014\000\000\000@\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/key/Reshape_1" + op: "Reshape" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/bias/read" + input: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/key/Reshape_1/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/key/einsum/transpose/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/key/einsum/transpose" + op: "Transpose" + input: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/key/Reshape" + input: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/key/einsum/transpose/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 64 + } + dim { + size: 12 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/key/einsum/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\004\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/key/einsum/Reshape" + op: "Reshape" + input: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm_1/batchnorm/add_1" + input: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/key/einsum/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/key/einsum/Reshape_1/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/key/einsum/Reshape_1" + op: "Reshape" + input: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/key/einsum/transpose" + input: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/key/einsum/Reshape_1/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/key/einsum/MatMul" + op: "MatMul" + input: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/key/einsum/Reshape" + input: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/key/einsum/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/key/einsum/Reshape_2/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\020\000\000\000@\000\000\000@\000\000\000\014\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/key/einsum/Reshape_2" + op: "Reshape" + input: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/key/einsum/MatMul" + input: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/key/einsum/Reshape_2/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 64 + } + dim { + size: 12 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/key/einsum/transpose_1/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\000\000\000\000\001\000\000\000\003\000\000\000\002\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/key/einsum/transpose_1" + op: "Transpose" + input: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/key/einsum/Reshape_2" + input: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/key/einsum/transpose_1/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/key/add" + op: "AddV2" + input: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/key/einsum/transpose_1" + input: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/key/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/value/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\000\003\000\000\014\000\000\000@\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/value/Reshape" + op: "Reshape" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/value/kernel/read" + input: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/value/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/value/Reshape_1/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\014\000\000\000@\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/value/Reshape_1" + op: "Reshape" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/value/bias/read" + input: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/value/Reshape_1/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/value/einsum/transpose/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/value/einsum/transpose" + op: "Transpose" + input: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/value/Reshape" + input: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/value/einsum/transpose/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 64 + } + dim { + size: 12 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/value/einsum/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\004\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/value/einsum/Reshape" + op: "Reshape" + input: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm_1/batchnorm/add_1" + input: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/value/einsum/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/value/einsum/Reshape_1/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/value/einsum/Reshape_1" + op: "Reshape" + input: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/value/einsum/transpose" + input: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/value/einsum/Reshape_1/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/value/einsum/MatMul" + op: "MatMul" + input: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/value/einsum/Reshape" + input: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/value/einsum/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/value/einsum/Reshape_2/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\020\000\000\000@\000\000\000@\000\000\000\014\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/value/einsum/Reshape_2" + op: "Reshape" + input: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/value/einsum/MatMul" + input: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/value/einsum/Reshape_2/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 64 + } + dim { + size: 12 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/value/einsum/transpose_1/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\000\000\000\000\001\000\000\000\003\000\000\000\002\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/value/einsum/transpose_1" + op: "Transpose" + input: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/value/einsum/Reshape_2" + input: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/value/einsum/transpose_1/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/value/add" + op: "AddV2" + input: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/value/einsum/transpose_1" + input: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/value/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/transpose/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000\003\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/transpose" + op: "Transpose" + input: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/query/add" + input: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/transpose/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/transpose_1/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000\003\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/transpose_1" + op: "Transpose" + input: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/key/add" + input: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/transpose_1/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/transpose_2/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000\003\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/transpose_2" + op: "Transpose" + input: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/value/add" + input: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/transpose_2/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\020\000\000\000\001\000\000\000@\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/Reshape" + op: "Reshape" + input: "IteratorGetNext:1" + input: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/Reshape/shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 1 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/MatMul" + op: "BatchMatMulV2" + input: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/transpose" + input: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/transpose_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: false + } + } + attr { + key: "adj_y" + value { + b: true + } + } +} +node { + name: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/Mul/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.125 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/Mul" + op: "Mul" + input: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/MatMul" + input: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/Mul/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/ones/shape_as_tensor" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\020\000\000\000\001\000\000\000@\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/ones/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/ones" + op: "Fill" + input: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/ones/shape_as_tensor" + input: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/ones/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 1 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/Cast" + op: "Cast" + input: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/Reshape" + attr { + key: "DstT" + value { + type: DT_FLOAT + } + } + attr { + key: "SrcT" + value { + type: DT_INT32 + } + } + attr { + key: "Truncate" + value { + b: false + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 1 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/MatMul_1" + op: "BatchMatMulV2" + input: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/ones" + input: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/Cast" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 1 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: false + } + } + attr { + key: "adj_y" + value { + b: true + } + } +} +node { + name: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/sub/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/sub" + op: "Sub" + input: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/sub/x" + input: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/MatMul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 1 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/mul_1/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: -10000.0 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/mul_1" + op: "Mul" + input: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/sub" + input: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/mul_1/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 1 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/add" + op: "AddV2" + input: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/Mul" + input: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/mul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/attention_probs" + op: "Softmax" + input: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/MatMul_2" + op: "BatchMatMulV2" + input: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/attention_probs" + input: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/transpose_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: false + } + } + attr { + key: "adj_y" + value { + b: false + } + } +} +node { + name: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/transpose_3/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000\003\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/transpose_3" + op: "Transpose" + input: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/MatMul_2" + input: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/transpose_3/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/output/dense/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\014\000\000\000@\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/output/dense/Reshape" + op: "Reshape" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/output/dense/kernel/read" + input: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/output/dense/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/output/dense/einsum/transpose/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\000\000\000\000\001\000\000\000\003\000\000\000\002\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/output/dense/einsum/transpose" + op: "Transpose" + input: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/transpose_3" + input: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/output/dense/einsum/transpose/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 64 + } + dim { + size: 12 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/output/dense/einsum/transpose_1/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\001\000\000\000\000\000\000\000\002\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/output/dense/einsum/transpose_1" + op: "Transpose" + input: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/output/dense/Reshape" + input: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/output/dense/einsum/transpose_1/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 64 + } + dim { + size: 12 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/output/dense/einsum/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\004\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/output/dense/einsum/Reshape" + op: "Reshape" + input: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/output/dense/einsum/transpose" + input: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/output/dense/einsum/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/output/dense/einsum/Reshape_1/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/output/dense/einsum/Reshape_1" + op: "Reshape" + input: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/output/dense/einsum/transpose_1" + input: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/output/dense/einsum/Reshape_1/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/output/dense/einsum/MatMul" + op: "MatMul" + input: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/output/dense/einsum/Reshape" + input: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/output/dense/einsum/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/output/dense/einsum/Reshape_2/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/output/dense/einsum/Reshape_2" + op: "Reshape" + input: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/output/dense/einsum/MatMul" + input: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/output/dense/einsum/Reshape_2/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/output/dense/add" + op: "AddV2" + input: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/output/dense/einsum/Reshape_2" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/output/dense/bias/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/add" + op: "AddV2" + input: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/output/dense/add" + input: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm_1/batchnorm/add_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm/moments/mean/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 2 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm/moments/mean" + op: "Mean" + input: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/add" + input: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm/moments/mean/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: true + } + } +} +node { + name: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm/moments/StopGradient" + op: "StopGradient" + input: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm/moments/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm/moments/SquaredDifference" + op: "SquaredDifference" + input: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/add" + input: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm/moments/StopGradient" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm/moments/variance/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 2 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm/moments/variance" + op: "Mean" + input: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm/moments/SquaredDifference" + input: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm/moments/variance/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: true + } + } +} +node { + name: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm/batchnorm/add/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999960041972e-13 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm/batchnorm/add" + op: "AddV2" + input: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm/moments/variance" + input: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm/batchnorm/add/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm/batchnorm/Rsqrt" + op: "Rsqrt" + input: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm/batchnorm/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm/batchnorm/mul" + op: "Mul" + input: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm/batchnorm/Rsqrt" + input: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm/gamma/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm/batchnorm/mul_1" + op: "Mul" + input: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/add" + input: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm/batchnorm/mul_2" + op: "Mul" + input: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm/moments/mean" + input: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm/batchnorm/sub" + op: "Sub" + input: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm/beta/read" + input: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm/batchnorm/mul_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm/batchnorm/add_1" + op: "AddV2" + input: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm/batchnorm/mul_1" + input: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm/batchnorm/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/ffn_1/intermediate/dense/einsum/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\004\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/ffn_1/intermediate/dense/einsum/Reshape" + op: "Reshape" + input: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm/batchnorm/add_1" + input: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/ffn_1/intermediate/dense/einsum/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/ffn_1/intermediate/dense/einsum/MatMul" + op: "MatMul" + input: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/ffn_1/intermediate/dense/einsum/Reshape" + input: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/ffn_1/intermediate/dense/einsum/Reshape_1/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\000\014\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/ffn_1/intermediate/dense/einsum/Reshape_1" + op: "Reshape" + input: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/ffn_1/intermediate/dense/einsum/MatMul" + input: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/ffn_1/intermediate/dense/einsum/Reshape_1/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/ffn_1/intermediate/dense/add" + op: "AddV2" + input: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/ffn_1/intermediate/dense/einsum/Reshape_1" + input: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/bias/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/ffn_1/intermediate/Pow/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 3.0 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/ffn_1/intermediate/Pow" + op: "Pow" + input: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/ffn_1/intermediate/dense/add" + input: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/ffn_1/intermediate/Pow/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/ffn_1/intermediate/mul/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.044714998453855515 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/ffn_1/intermediate/mul" + op: "Mul" + input: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/ffn_1/intermediate/mul/x" + input: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/ffn_1/intermediate/Pow" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/ffn_1/intermediate/add" + op: "AddV2" + input: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/ffn_1/intermediate/dense/add" + input: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/ffn_1/intermediate/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/ffn_1/intermediate/mul_1/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.7978845834732056 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/ffn_1/intermediate/mul_1" + op: "Mul" + input: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/ffn_1/intermediate/mul_1/x" + input: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/ffn_1/intermediate/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/ffn_1/intermediate/Tanh" + op: "Tanh" + input: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/ffn_1/intermediate/mul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/ffn_1/intermediate/add_1/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/ffn_1/intermediate/add_1" + op: "AddV2" + input: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/ffn_1/intermediate/add_1/x" + input: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/ffn_1/intermediate/Tanh" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/ffn_1/intermediate/mul_2/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.5 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/ffn_1/intermediate/mul_2" + op: "Mul" + input: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/ffn_1/intermediate/mul_2/x" + input: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/ffn_1/intermediate/add_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/ffn_1/intermediate/mul_3" + op: "Mul" + input: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/ffn_1/intermediate/dense/add" + input: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/ffn_1/intermediate/mul_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\004\000\000\000\014\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape" + op: "Reshape" + input: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/ffn_1/intermediate/mul_3" + input: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/ffn_1/intermediate/output/dense/einsum/MatMul" + op: "MatMul" + input: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape" + input: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape_1/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape_1" + op: "Reshape" + input: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/ffn_1/intermediate/output/dense/einsum/MatMul" + input: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape_1/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/ffn_1/intermediate/output/dense/add" + op: "AddV2" + input: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape_1" + input: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/bias/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/add_1" + op: "AddV2" + input: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/ffn_1/intermediate/output/dense/add" + input: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm/batchnorm/add_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm_1/moments/mean/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 2 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm_1/moments/mean" + op: "Mean" + input: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/add_1" + input: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm_1/moments/mean/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: true + } + } +} +node { + name: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm_1/moments/StopGradient" + op: "StopGradient" + input: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm_1/moments/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm_1/moments/SquaredDifference" + op: "SquaredDifference" + input: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/add_1" + input: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm_1/moments/StopGradient" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm_1/moments/variance/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 2 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm_1/moments/variance" + op: "Mean" + input: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm_1/moments/SquaredDifference" + input: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm_1/moments/variance/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: true + } + } +} +node { + name: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm_1/batchnorm/add/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999960041972e-13 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm_1/batchnorm/add" + op: "AddV2" + input: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm_1/moments/variance" + input: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm_1/batchnorm/add/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm_1/batchnorm/Rsqrt" + op: "Rsqrt" + input: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm_1/batchnorm/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm_1/batchnorm/mul" + op: "Mul" + input: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm_1/batchnorm/Rsqrt" + input: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm_1/gamma/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm_1/batchnorm/mul_1" + op: "Mul" + input: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/add_1" + input: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm_1/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm_1/batchnorm/mul_2" + op: "Mul" + input: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm_1/moments/mean" + input: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm_1/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm_1/batchnorm/sub" + op: "Sub" + input: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm_1/beta/read" + input: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm_1/batchnorm/mul_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm_1/batchnorm/add_1" + op: "AddV2" + input: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm_1/batchnorm/mul_1" + input: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm_1/batchnorm/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/query/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\000\003\000\000\014\000\000\000@\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/query/Reshape" + op: "Reshape" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/kernel/read" + input: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/query/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/query/Reshape_1/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\014\000\000\000@\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/query/Reshape_1" + op: "Reshape" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/bias/read" + input: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/query/Reshape_1/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/query/einsum/transpose/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/query/einsum/transpose" + op: "Transpose" + input: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/query/Reshape" + input: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/query/einsum/transpose/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 64 + } + dim { + size: 12 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/query/einsum/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\004\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/query/einsum/Reshape" + op: "Reshape" + input: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm_1/batchnorm/add_1" + input: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/query/einsum/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/query/einsum/Reshape_1/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/query/einsum/Reshape_1" + op: "Reshape" + input: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/query/einsum/transpose" + input: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/query/einsum/Reshape_1/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/query/einsum/MatMul" + op: "MatMul" + input: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/query/einsum/Reshape" + input: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/query/einsum/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/query/einsum/Reshape_2/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\020\000\000\000@\000\000\000@\000\000\000\014\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/query/einsum/Reshape_2" + op: "Reshape" + input: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/query/einsum/MatMul" + input: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/query/einsum/Reshape_2/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 64 + } + dim { + size: 12 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/query/einsum/transpose_1/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\000\000\000\000\001\000\000\000\003\000\000\000\002\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/query/einsum/transpose_1" + op: "Transpose" + input: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/query/einsum/Reshape_2" + input: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/query/einsum/transpose_1/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/query/add" + op: "AddV2" + input: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/query/einsum/transpose_1" + input: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/query/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/key/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\000\003\000\000\014\000\000\000@\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/key/Reshape" + op: "Reshape" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/kernel/read" + input: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/key/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/key/Reshape_1/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\014\000\000\000@\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/key/Reshape_1" + op: "Reshape" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/bias/read" + input: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/key/Reshape_1/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/key/einsum/transpose/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/key/einsum/transpose" + op: "Transpose" + input: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/key/Reshape" + input: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/key/einsum/transpose/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 64 + } + dim { + size: 12 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/key/einsum/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\004\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/key/einsum/Reshape" + op: "Reshape" + input: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm_1/batchnorm/add_1" + input: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/key/einsum/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/key/einsum/Reshape_1/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/key/einsum/Reshape_1" + op: "Reshape" + input: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/key/einsum/transpose" + input: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/key/einsum/Reshape_1/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/key/einsum/MatMul" + op: "MatMul" + input: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/key/einsum/Reshape" + input: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/key/einsum/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/key/einsum/Reshape_2/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\020\000\000\000@\000\000\000@\000\000\000\014\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/key/einsum/Reshape_2" + op: "Reshape" + input: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/key/einsum/MatMul" + input: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/key/einsum/Reshape_2/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 64 + } + dim { + size: 12 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/key/einsum/transpose_1/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\000\000\000\000\001\000\000\000\003\000\000\000\002\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/key/einsum/transpose_1" + op: "Transpose" + input: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/key/einsum/Reshape_2" + input: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/key/einsum/transpose_1/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/key/add" + op: "AddV2" + input: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/key/einsum/transpose_1" + input: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/key/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/value/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\000\003\000\000\014\000\000\000@\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/value/Reshape" + op: "Reshape" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/value/kernel/read" + input: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/value/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/value/Reshape_1/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\014\000\000\000@\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/value/Reshape_1" + op: "Reshape" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/value/bias/read" + input: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/value/Reshape_1/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/value/einsum/transpose/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/value/einsum/transpose" + op: "Transpose" + input: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/value/Reshape" + input: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/value/einsum/transpose/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 64 + } + dim { + size: 12 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/value/einsum/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\004\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/value/einsum/Reshape" + op: "Reshape" + input: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm_1/batchnorm/add_1" + input: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/value/einsum/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/value/einsum/Reshape_1/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/value/einsum/Reshape_1" + op: "Reshape" + input: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/value/einsum/transpose" + input: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/value/einsum/Reshape_1/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/value/einsum/MatMul" + op: "MatMul" + input: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/value/einsum/Reshape" + input: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/value/einsum/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/value/einsum/Reshape_2/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\020\000\000\000@\000\000\000@\000\000\000\014\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/value/einsum/Reshape_2" + op: "Reshape" + input: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/value/einsum/MatMul" + input: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/value/einsum/Reshape_2/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 64 + } + dim { + size: 12 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/value/einsum/transpose_1/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\000\000\000\000\001\000\000\000\003\000\000\000\002\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/value/einsum/transpose_1" + op: "Transpose" + input: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/value/einsum/Reshape_2" + input: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/value/einsum/transpose_1/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/value/add" + op: "AddV2" + input: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/value/einsum/transpose_1" + input: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/value/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/transpose/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000\003\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/transpose" + op: "Transpose" + input: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/query/add" + input: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/transpose/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/transpose_1/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000\003\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/transpose_1" + op: "Transpose" + input: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/key/add" + input: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/transpose_1/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/transpose_2/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000\003\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/transpose_2" + op: "Transpose" + input: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/value/add" + input: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/transpose_2/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\020\000\000\000\001\000\000\000@\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/Reshape" + op: "Reshape" + input: "IteratorGetNext:1" + input: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/Reshape/shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 1 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/MatMul" + op: "BatchMatMulV2" + input: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/transpose" + input: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/transpose_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: false + } + } + attr { + key: "adj_y" + value { + b: true + } + } +} +node { + name: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/Mul/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.125 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/Mul" + op: "Mul" + input: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/MatMul" + input: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/Mul/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/ones/shape_as_tensor" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\020\000\000\000\001\000\000\000@\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/ones/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/ones" + op: "Fill" + input: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/ones/shape_as_tensor" + input: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/ones/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 1 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/Cast" + op: "Cast" + input: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/Reshape" + attr { + key: "DstT" + value { + type: DT_FLOAT + } + } + attr { + key: "SrcT" + value { + type: DT_INT32 + } + } + attr { + key: "Truncate" + value { + b: false + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 1 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/MatMul_1" + op: "BatchMatMulV2" + input: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/ones" + input: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/Cast" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 1 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: false + } + } + attr { + key: "adj_y" + value { + b: true + } + } +} +node { + name: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/sub/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/sub" + op: "Sub" + input: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/sub/x" + input: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/MatMul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 1 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/mul_1/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: -10000.0 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/mul_1" + op: "Mul" + input: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/sub" + input: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/mul_1/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 1 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/add" + op: "AddV2" + input: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/Mul" + input: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/mul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/attention_probs" + op: "Softmax" + input: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/MatMul_2" + op: "BatchMatMulV2" + input: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/attention_probs" + input: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/transpose_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: false + } + } + attr { + key: "adj_y" + value { + b: false + } + } +} +node { + name: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/transpose_3/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000\003\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/transpose_3" + op: "Transpose" + input: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/MatMul_2" + input: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/transpose_3/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/output/dense/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\014\000\000\000@\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/output/dense/Reshape" + op: "Reshape" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/output/dense/kernel/read" + input: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/output/dense/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/output/dense/einsum/transpose/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\000\000\000\000\001\000\000\000\003\000\000\000\002\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/output/dense/einsum/transpose" + op: "Transpose" + input: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/transpose_3" + input: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/output/dense/einsum/transpose/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 64 + } + dim { + size: 12 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/output/dense/einsum/transpose_1/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\001\000\000\000\000\000\000\000\002\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/output/dense/einsum/transpose_1" + op: "Transpose" + input: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/output/dense/Reshape" + input: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/output/dense/einsum/transpose_1/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 64 + } + dim { + size: 12 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/output/dense/einsum/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\004\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/output/dense/einsum/Reshape" + op: "Reshape" + input: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/output/dense/einsum/transpose" + input: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/output/dense/einsum/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/output/dense/einsum/Reshape_1/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/output/dense/einsum/Reshape_1" + op: "Reshape" + input: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/output/dense/einsum/transpose_1" + input: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/output/dense/einsum/Reshape_1/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/output/dense/einsum/MatMul" + op: "MatMul" + input: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/output/dense/einsum/Reshape" + input: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/output/dense/einsum/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/output/dense/einsum/Reshape_2/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/output/dense/einsum/Reshape_2" + op: "Reshape" + input: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/output/dense/einsum/MatMul" + input: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/output/dense/einsum/Reshape_2/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/output/dense/add" + op: "AddV2" + input: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/output/dense/einsum/Reshape_2" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/output/dense/bias/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/add" + op: "AddV2" + input: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/output/dense/add" + input: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm_1/batchnorm/add_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm/moments/mean/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 2 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm/moments/mean" + op: "Mean" + input: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/add" + input: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm/moments/mean/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: true + } + } +} +node { + name: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm/moments/StopGradient" + op: "StopGradient" + input: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm/moments/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm/moments/SquaredDifference" + op: "SquaredDifference" + input: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/add" + input: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm/moments/StopGradient" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm/moments/variance/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 2 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm/moments/variance" + op: "Mean" + input: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm/moments/SquaredDifference" + input: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm/moments/variance/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: true + } + } +} +node { + name: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm/batchnorm/add/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999960041972e-13 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm/batchnorm/add" + op: "AddV2" + input: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm/moments/variance" + input: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm/batchnorm/add/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm/batchnorm/Rsqrt" + op: "Rsqrt" + input: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm/batchnorm/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm/batchnorm/mul" + op: "Mul" + input: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm/batchnorm/Rsqrt" + input: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm/gamma/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm/batchnorm/mul_1" + op: "Mul" + input: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/add" + input: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm/batchnorm/mul_2" + op: "Mul" + input: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm/moments/mean" + input: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm/batchnorm/sub" + op: "Sub" + input: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm/beta/read" + input: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm/batchnorm/mul_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm/batchnorm/add_1" + op: "AddV2" + input: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm/batchnorm/mul_1" + input: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm/batchnorm/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/ffn_1/intermediate/dense/einsum/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\004\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/ffn_1/intermediate/dense/einsum/Reshape" + op: "Reshape" + input: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm/batchnorm/add_1" + input: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/ffn_1/intermediate/dense/einsum/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/ffn_1/intermediate/dense/einsum/MatMul" + op: "MatMul" + input: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/ffn_1/intermediate/dense/einsum/Reshape" + input: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/ffn_1/intermediate/dense/einsum/Reshape_1/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\000\014\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/ffn_1/intermediate/dense/einsum/Reshape_1" + op: "Reshape" + input: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/ffn_1/intermediate/dense/einsum/MatMul" + input: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/ffn_1/intermediate/dense/einsum/Reshape_1/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/ffn_1/intermediate/dense/add" + op: "AddV2" + input: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/ffn_1/intermediate/dense/einsum/Reshape_1" + input: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/bias/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/ffn_1/intermediate/Pow/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 3.0 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/ffn_1/intermediate/Pow" + op: "Pow" + input: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/ffn_1/intermediate/dense/add" + input: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/ffn_1/intermediate/Pow/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/ffn_1/intermediate/mul/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.044714998453855515 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/ffn_1/intermediate/mul" + op: "Mul" + input: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/ffn_1/intermediate/mul/x" + input: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/ffn_1/intermediate/Pow" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/ffn_1/intermediate/add" + op: "AddV2" + input: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/ffn_1/intermediate/dense/add" + input: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/ffn_1/intermediate/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/ffn_1/intermediate/mul_1/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.7978845834732056 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/ffn_1/intermediate/mul_1" + op: "Mul" + input: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/ffn_1/intermediate/mul_1/x" + input: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/ffn_1/intermediate/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/ffn_1/intermediate/Tanh" + op: "Tanh" + input: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/ffn_1/intermediate/mul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/ffn_1/intermediate/add_1/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/ffn_1/intermediate/add_1" + op: "AddV2" + input: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/ffn_1/intermediate/add_1/x" + input: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/ffn_1/intermediate/Tanh" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/ffn_1/intermediate/mul_2/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.5 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/ffn_1/intermediate/mul_2" + op: "Mul" + input: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/ffn_1/intermediate/mul_2/x" + input: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/ffn_1/intermediate/add_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/ffn_1/intermediate/mul_3" + op: "Mul" + input: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/ffn_1/intermediate/dense/add" + input: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/ffn_1/intermediate/mul_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\004\000\000\000\014\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape" + op: "Reshape" + input: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/ffn_1/intermediate/mul_3" + input: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/ffn_1/intermediate/output/dense/einsum/MatMul" + op: "MatMul" + input: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape" + input: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape_1/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape_1" + op: "Reshape" + input: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/ffn_1/intermediate/output/dense/einsum/MatMul" + input: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape_1/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/ffn_1/intermediate/output/dense/add" + op: "AddV2" + input: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape_1" + input: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/bias/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/add_1" + op: "AddV2" + input: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/ffn_1/intermediate/output/dense/add" + input: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm/batchnorm/add_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm_1/moments/mean/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 2 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm_1/moments/mean" + op: "Mean" + input: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/add_1" + input: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm_1/moments/mean/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: true + } + } +} +node { + name: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm_1/moments/StopGradient" + op: "StopGradient" + input: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm_1/moments/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm_1/moments/SquaredDifference" + op: "SquaredDifference" + input: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/add_1" + input: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm_1/moments/StopGradient" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm_1/moments/variance/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 2 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm_1/moments/variance" + op: "Mean" + input: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm_1/moments/SquaredDifference" + input: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm_1/moments/variance/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: true + } + } +} +node { + name: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm_1/batchnorm/add/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999960041972e-13 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm_1/batchnorm/add" + op: "AddV2" + input: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm_1/moments/variance" + input: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm_1/batchnorm/add/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm_1/batchnorm/Rsqrt" + op: "Rsqrt" + input: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm_1/batchnorm/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm_1/batchnorm/mul" + op: "Mul" + input: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm_1/batchnorm/Rsqrt" + input: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm_1/gamma/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm_1/batchnorm/mul_1" + op: "Mul" + input: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/add_1" + input: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm_1/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm_1/batchnorm/mul_2" + op: "Mul" + input: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm_1/moments/mean" + input: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm_1/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm_1/batchnorm/sub" + op: "Sub" + input: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm_1/beta/read" + input: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm_1/batchnorm/mul_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm_1/batchnorm/add_1" + op: "AddV2" + input: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm_1/batchnorm/mul_1" + input: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm_1/batchnorm/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/query/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\000\003\000\000\014\000\000\000@\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/query/Reshape" + op: "Reshape" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/kernel/read" + input: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/query/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/query/Reshape_1/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\014\000\000\000@\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/query/Reshape_1" + op: "Reshape" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/bias/read" + input: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/query/Reshape_1/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/query/einsum/transpose/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/query/einsum/transpose" + op: "Transpose" + input: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/query/Reshape" + input: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/query/einsum/transpose/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 64 + } + dim { + size: 12 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/query/einsum/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\004\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/query/einsum/Reshape" + op: "Reshape" + input: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm_1/batchnorm/add_1" + input: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/query/einsum/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/query/einsum/Reshape_1/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/query/einsum/Reshape_1" + op: "Reshape" + input: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/query/einsum/transpose" + input: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/query/einsum/Reshape_1/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/query/einsum/MatMul" + op: "MatMul" + input: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/query/einsum/Reshape" + input: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/query/einsum/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/query/einsum/Reshape_2/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\020\000\000\000@\000\000\000@\000\000\000\014\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/query/einsum/Reshape_2" + op: "Reshape" + input: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/query/einsum/MatMul" + input: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/query/einsum/Reshape_2/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 64 + } + dim { + size: 12 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/query/einsum/transpose_1/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\000\000\000\000\001\000\000\000\003\000\000\000\002\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/query/einsum/transpose_1" + op: "Transpose" + input: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/query/einsum/Reshape_2" + input: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/query/einsum/transpose_1/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/query/add" + op: "AddV2" + input: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/query/einsum/transpose_1" + input: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/query/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/key/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\000\003\000\000\014\000\000\000@\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/key/Reshape" + op: "Reshape" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/kernel/read" + input: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/key/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/key/Reshape_1/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\014\000\000\000@\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/key/Reshape_1" + op: "Reshape" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/bias/read" + input: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/key/Reshape_1/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/key/einsum/transpose/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/key/einsum/transpose" + op: "Transpose" + input: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/key/Reshape" + input: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/key/einsum/transpose/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 64 + } + dim { + size: 12 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/key/einsum/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\004\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/key/einsum/Reshape" + op: "Reshape" + input: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm_1/batchnorm/add_1" + input: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/key/einsum/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/key/einsum/Reshape_1/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/key/einsum/Reshape_1" + op: "Reshape" + input: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/key/einsum/transpose" + input: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/key/einsum/Reshape_1/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/key/einsum/MatMul" + op: "MatMul" + input: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/key/einsum/Reshape" + input: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/key/einsum/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/key/einsum/Reshape_2/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\020\000\000\000@\000\000\000@\000\000\000\014\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/key/einsum/Reshape_2" + op: "Reshape" + input: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/key/einsum/MatMul" + input: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/key/einsum/Reshape_2/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 64 + } + dim { + size: 12 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/key/einsum/transpose_1/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\000\000\000\000\001\000\000\000\003\000\000\000\002\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/key/einsum/transpose_1" + op: "Transpose" + input: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/key/einsum/Reshape_2" + input: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/key/einsum/transpose_1/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/key/add" + op: "AddV2" + input: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/key/einsum/transpose_1" + input: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/key/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/value/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\000\003\000\000\014\000\000\000@\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/value/Reshape" + op: "Reshape" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/value/kernel/read" + input: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/value/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/value/Reshape_1/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\014\000\000\000@\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/value/Reshape_1" + op: "Reshape" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/value/bias/read" + input: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/value/Reshape_1/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/value/einsum/transpose/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/value/einsum/transpose" + op: "Transpose" + input: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/value/Reshape" + input: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/value/einsum/transpose/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 64 + } + dim { + size: 12 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/value/einsum/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\004\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/value/einsum/Reshape" + op: "Reshape" + input: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm_1/batchnorm/add_1" + input: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/value/einsum/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/value/einsum/Reshape_1/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/value/einsum/Reshape_1" + op: "Reshape" + input: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/value/einsum/transpose" + input: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/value/einsum/Reshape_1/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/value/einsum/MatMul" + op: "MatMul" + input: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/value/einsum/Reshape" + input: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/value/einsum/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/value/einsum/Reshape_2/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\020\000\000\000@\000\000\000@\000\000\000\014\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/value/einsum/Reshape_2" + op: "Reshape" + input: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/value/einsum/MatMul" + input: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/value/einsum/Reshape_2/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 64 + } + dim { + size: 12 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/value/einsum/transpose_1/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\000\000\000\000\001\000\000\000\003\000\000\000\002\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/value/einsum/transpose_1" + op: "Transpose" + input: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/value/einsum/Reshape_2" + input: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/value/einsum/transpose_1/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/value/add" + op: "AddV2" + input: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/value/einsum/transpose_1" + input: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/value/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/transpose/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000\003\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/transpose" + op: "Transpose" + input: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/query/add" + input: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/transpose/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/transpose_1/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000\003\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/transpose_1" + op: "Transpose" + input: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/key/add" + input: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/transpose_1/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/transpose_2/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000\003\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/transpose_2" + op: "Transpose" + input: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/value/add" + input: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/transpose_2/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\020\000\000\000\001\000\000\000@\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/Reshape" + op: "Reshape" + input: "IteratorGetNext:1" + input: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/Reshape/shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 1 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/MatMul" + op: "BatchMatMulV2" + input: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/transpose" + input: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/transpose_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: false + } + } + attr { + key: "adj_y" + value { + b: true + } + } +} +node { + name: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/Mul/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.125 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/Mul" + op: "Mul" + input: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/MatMul" + input: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/Mul/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/ones/shape_as_tensor" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\020\000\000\000\001\000\000\000@\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/ones/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/ones" + op: "Fill" + input: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/ones/shape_as_tensor" + input: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/ones/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 1 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/Cast" + op: "Cast" + input: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/Reshape" + attr { + key: "DstT" + value { + type: DT_FLOAT + } + } + attr { + key: "SrcT" + value { + type: DT_INT32 + } + } + attr { + key: "Truncate" + value { + b: false + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 1 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/MatMul_1" + op: "BatchMatMulV2" + input: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/ones" + input: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/Cast" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 1 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: false + } + } + attr { + key: "adj_y" + value { + b: true + } + } +} +node { + name: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/sub/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/sub" + op: "Sub" + input: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/sub/x" + input: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/MatMul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 1 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/mul_1/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: -10000.0 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/mul_1" + op: "Mul" + input: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/sub" + input: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/mul_1/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 1 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/add" + op: "AddV2" + input: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/Mul" + input: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/mul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/attention_probs" + op: "Softmax" + input: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/MatMul_2" + op: "BatchMatMulV2" + input: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/attention_probs" + input: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/transpose_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: false + } + } + attr { + key: "adj_y" + value { + b: false + } + } +} +node { + name: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/transpose_3/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000\003\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/transpose_3" + op: "Transpose" + input: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/MatMul_2" + input: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/transpose_3/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/output/dense/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\014\000\000\000@\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/output/dense/Reshape" + op: "Reshape" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/output/dense/kernel/read" + input: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/output/dense/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/output/dense/einsum/transpose/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\000\000\000\000\001\000\000\000\003\000\000\000\002\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/output/dense/einsum/transpose" + op: "Transpose" + input: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/transpose_3" + input: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/output/dense/einsum/transpose/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 64 + } + dim { + size: 12 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/output/dense/einsum/transpose_1/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\001\000\000\000\000\000\000\000\002\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/output/dense/einsum/transpose_1" + op: "Transpose" + input: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/output/dense/Reshape" + input: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/output/dense/einsum/transpose_1/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 64 + } + dim { + size: 12 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/output/dense/einsum/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\004\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/output/dense/einsum/Reshape" + op: "Reshape" + input: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/output/dense/einsum/transpose" + input: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/output/dense/einsum/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/output/dense/einsum/Reshape_1/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/output/dense/einsum/Reshape_1" + op: "Reshape" + input: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/output/dense/einsum/transpose_1" + input: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/output/dense/einsum/Reshape_1/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/output/dense/einsum/MatMul" + op: "MatMul" + input: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/output/dense/einsum/Reshape" + input: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/output/dense/einsum/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/output/dense/einsum/Reshape_2/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/output/dense/einsum/Reshape_2" + op: "Reshape" + input: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/output/dense/einsum/MatMul" + input: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/output/dense/einsum/Reshape_2/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/output/dense/add" + op: "AddV2" + input: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/output/dense/einsum/Reshape_2" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/output/dense/bias/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/add" + op: "AddV2" + input: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/output/dense/add" + input: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm_1/batchnorm/add_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm/moments/mean/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 2 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm/moments/mean" + op: "Mean" + input: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/add" + input: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm/moments/mean/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: true + } + } +} +node { + name: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm/moments/StopGradient" + op: "StopGradient" + input: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm/moments/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm/moments/SquaredDifference" + op: "SquaredDifference" + input: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/add" + input: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm/moments/StopGradient" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm/moments/variance/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 2 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm/moments/variance" + op: "Mean" + input: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm/moments/SquaredDifference" + input: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm/moments/variance/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: true + } + } +} +node { + name: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm/batchnorm/add/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999960041972e-13 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm/batchnorm/add" + op: "AddV2" + input: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm/moments/variance" + input: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm/batchnorm/add/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm/batchnorm/Rsqrt" + op: "Rsqrt" + input: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm/batchnorm/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm/batchnorm/mul" + op: "Mul" + input: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm/batchnorm/Rsqrt" + input: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm/gamma/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm/batchnorm/mul_1" + op: "Mul" + input: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/add" + input: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm/batchnorm/mul_2" + op: "Mul" + input: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm/moments/mean" + input: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm/batchnorm/sub" + op: "Sub" + input: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm/beta/read" + input: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm/batchnorm/mul_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm/batchnorm/add_1" + op: "AddV2" + input: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm/batchnorm/mul_1" + input: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm/batchnorm/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/ffn_1/intermediate/dense/einsum/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\004\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/ffn_1/intermediate/dense/einsum/Reshape" + op: "Reshape" + input: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm/batchnorm/add_1" + input: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/ffn_1/intermediate/dense/einsum/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/ffn_1/intermediate/dense/einsum/MatMul" + op: "MatMul" + input: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/ffn_1/intermediate/dense/einsum/Reshape" + input: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/ffn_1/intermediate/dense/einsum/Reshape_1/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\000\014\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/ffn_1/intermediate/dense/einsum/Reshape_1" + op: "Reshape" + input: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/ffn_1/intermediate/dense/einsum/MatMul" + input: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/ffn_1/intermediate/dense/einsum/Reshape_1/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/ffn_1/intermediate/dense/add" + op: "AddV2" + input: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/ffn_1/intermediate/dense/einsum/Reshape_1" + input: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/bias/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/ffn_1/intermediate/Pow/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 3.0 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/ffn_1/intermediate/Pow" + op: "Pow" + input: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/ffn_1/intermediate/dense/add" + input: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/ffn_1/intermediate/Pow/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/ffn_1/intermediate/mul/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.044714998453855515 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/ffn_1/intermediate/mul" + op: "Mul" + input: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/ffn_1/intermediate/mul/x" + input: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/ffn_1/intermediate/Pow" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/ffn_1/intermediate/add" + op: "AddV2" + input: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/ffn_1/intermediate/dense/add" + input: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/ffn_1/intermediate/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/ffn_1/intermediate/mul_1/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.7978845834732056 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/ffn_1/intermediate/mul_1" + op: "Mul" + input: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/ffn_1/intermediate/mul_1/x" + input: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/ffn_1/intermediate/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/ffn_1/intermediate/Tanh" + op: "Tanh" + input: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/ffn_1/intermediate/mul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/ffn_1/intermediate/add_1/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/ffn_1/intermediate/add_1" + op: "AddV2" + input: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/ffn_1/intermediate/add_1/x" + input: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/ffn_1/intermediate/Tanh" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/ffn_1/intermediate/mul_2/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.5 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/ffn_1/intermediate/mul_2" + op: "Mul" + input: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/ffn_1/intermediate/mul_2/x" + input: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/ffn_1/intermediate/add_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/ffn_1/intermediate/mul_3" + op: "Mul" + input: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/ffn_1/intermediate/dense/add" + input: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/ffn_1/intermediate/mul_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\004\000\000\000\014\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape" + op: "Reshape" + input: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/ffn_1/intermediate/mul_3" + input: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/ffn_1/intermediate/output/dense/einsum/MatMul" + op: "MatMul" + input: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape" + input: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape_1/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape_1" + op: "Reshape" + input: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/ffn_1/intermediate/output/dense/einsum/MatMul" + input: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape_1/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/ffn_1/intermediate/output/dense/add" + op: "AddV2" + input: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape_1" + input: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/bias/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/add_1" + op: "AddV2" + input: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/ffn_1/intermediate/output/dense/add" + input: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm/batchnorm/add_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm_1/moments/mean/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 2 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm_1/moments/mean" + op: "Mean" + input: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/add_1" + input: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm_1/moments/mean/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: true + } + } +} +node { + name: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm_1/moments/StopGradient" + op: "StopGradient" + input: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm_1/moments/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm_1/moments/SquaredDifference" + op: "SquaredDifference" + input: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/add_1" + input: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm_1/moments/StopGradient" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm_1/moments/variance/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 2 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm_1/moments/variance" + op: "Mean" + input: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm_1/moments/SquaredDifference" + input: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm_1/moments/variance/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: true + } + } +} +node { + name: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm_1/batchnorm/add/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999960041972e-13 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm_1/batchnorm/add" + op: "AddV2" + input: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm_1/moments/variance" + input: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm_1/batchnorm/add/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm_1/batchnorm/Rsqrt" + op: "Rsqrt" + input: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm_1/batchnorm/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm_1/batchnorm/mul" + op: "Mul" + input: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm_1/batchnorm/Rsqrt" + input: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm_1/gamma/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm_1/batchnorm/mul_1" + op: "Mul" + input: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/add_1" + input: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm_1/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm_1/batchnorm/mul_2" + op: "Mul" + input: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm_1/moments/mean" + input: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm_1/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm_1/batchnorm/sub" + op: "Sub" + input: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm_1/beta/read" + input: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm_1/batchnorm/mul_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm_1/batchnorm/add_1" + op: "AddV2" + input: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm_1/batchnorm/mul_1" + input: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm_1/batchnorm/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/query/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\000\003\000\000\014\000\000\000@\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/query/Reshape" + op: "Reshape" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/kernel/read" + input: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/query/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/query/Reshape_1/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\014\000\000\000@\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/query/Reshape_1" + op: "Reshape" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/bias/read" + input: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/query/Reshape_1/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/query/einsum/transpose/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/query/einsum/transpose" + op: "Transpose" + input: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/query/Reshape" + input: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/query/einsum/transpose/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 64 + } + dim { + size: 12 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/query/einsum/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\004\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/query/einsum/Reshape" + op: "Reshape" + input: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm_1/batchnorm/add_1" + input: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/query/einsum/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/query/einsum/Reshape_1/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/query/einsum/Reshape_1" + op: "Reshape" + input: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/query/einsum/transpose" + input: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/query/einsum/Reshape_1/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/query/einsum/MatMul" + op: "MatMul" + input: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/query/einsum/Reshape" + input: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/query/einsum/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/query/einsum/Reshape_2/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\020\000\000\000@\000\000\000@\000\000\000\014\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/query/einsum/Reshape_2" + op: "Reshape" + input: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/query/einsum/MatMul" + input: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/query/einsum/Reshape_2/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 64 + } + dim { + size: 12 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/query/einsum/transpose_1/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\000\000\000\000\001\000\000\000\003\000\000\000\002\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/query/einsum/transpose_1" + op: "Transpose" + input: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/query/einsum/Reshape_2" + input: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/query/einsum/transpose_1/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/query/add" + op: "AddV2" + input: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/query/einsum/transpose_1" + input: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/query/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/key/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\000\003\000\000\014\000\000\000@\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/key/Reshape" + op: "Reshape" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/kernel/read" + input: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/key/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/key/Reshape_1/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\014\000\000\000@\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/key/Reshape_1" + op: "Reshape" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/bias/read" + input: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/key/Reshape_1/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/key/einsum/transpose/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/key/einsum/transpose" + op: "Transpose" + input: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/key/Reshape" + input: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/key/einsum/transpose/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 64 + } + dim { + size: 12 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/key/einsum/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\004\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/key/einsum/Reshape" + op: "Reshape" + input: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm_1/batchnorm/add_1" + input: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/key/einsum/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/key/einsum/Reshape_1/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/key/einsum/Reshape_1" + op: "Reshape" + input: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/key/einsum/transpose" + input: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/key/einsum/Reshape_1/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/key/einsum/MatMul" + op: "MatMul" + input: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/key/einsum/Reshape" + input: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/key/einsum/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/key/einsum/Reshape_2/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\020\000\000\000@\000\000\000@\000\000\000\014\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/key/einsum/Reshape_2" + op: "Reshape" + input: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/key/einsum/MatMul" + input: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/key/einsum/Reshape_2/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 64 + } + dim { + size: 12 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/key/einsum/transpose_1/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\000\000\000\000\001\000\000\000\003\000\000\000\002\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/key/einsum/transpose_1" + op: "Transpose" + input: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/key/einsum/Reshape_2" + input: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/key/einsum/transpose_1/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/key/add" + op: "AddV2" + input: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/key/einsum/transpose_1" + input: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/key/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/value/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\000\003\000\000\014\000\000\000@\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/value/Reshape" + op: "Reshape" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/value/kernel/read" + input: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/value/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/value/Reshape_1/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\014\000\000\000@\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/value/Reshape_1" + op: "Reshape" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/value/bias/read" + input: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/value/Reshape_1/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/value/einsum/transpose/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/value/einsum/transpose" + op: "Transpose" + input: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/value/Reshape" + input: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/value/einsum/transpose/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 64 + } + dim { + size: 12 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/value/einsum/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\004\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/value/einsum/Reshape" + op: "Reshape" + input: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm_1/batchnorm/add_1" + input: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/value/einsum/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/value/einsum/Reshape_1/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/value/einsum/Reshape_1" + op: "Reshape" + input: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/value/einsum/transpose" + input: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/value/einsum/Reshape_1/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/value/einsum/MatMul" + op: "MatMul" + input: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/value/einsum/Reshape" + input: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/value/einsum/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/value/einsum/Reshape_2/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\020\000\000\000@\000\000\000@\000\000\000\014\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/value/einsum/Reshape_2" + op: "Reshape" + input: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/value/einsum/MatMul" + input: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/value/einsum/Reshape_2/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 64 + } + dim { + size: 12 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/value/einsum/transpose_1/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\000\000\000\000\001\000\000\000\003\000\000\000\002\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/value/einsum/transpose_1" + op: "Transpose" + input: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/value/einsum/Reshape_2" + input: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/value/einsum/transpose_1/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/value/add" + op: "AddV2" + input: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/value/einsum/transpose_1" + input: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/value/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/transpose/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000\003\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/transpose" + op: "Transpose" + input: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/query/add" + input: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/transpose/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/transpose_1/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000\003\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/transpose_1" + op: "Transpose" + input: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/key/add" + input: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/transpose_1/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/transpose_2/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000\003\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/transpose_2" + op: "Transpose" + input: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/value/add" + input: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/transpose_2/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\020\000\000\000\001\000\000\000@\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/Reshape" + op: "Reshape" + input: "IteratorGetNext:1" + input: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/Reshape/shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 1 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/MatMul" + op: "BatchMatMulV2" + input: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/transpose" + input: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/transpose_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: false + } + } + attr { + key: "adj_y" + value { + b: true + } + } +} +node { + name: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/Mul/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.125 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/Mul" + op: "Mul" + input: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/MatMul" + input: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/Mul/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/ones/shape_as_tensor" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\020\000\000\000\001\000\000\000@\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/ones/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/ones" + op: "Fill" + input: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/ones/shape_as_tensor" + input: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/ones/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 1 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/Cast" + op: "Cast" + input: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/Reshape" + attr { + key: "DstT" + value { + type: DT_FLOAT + } + } + attr { + key: "SrcT" + value { + type: DT_INT32 + } + } + attr { + key: "Truncate" + value { + b: false + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 1 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/MatMul_1" + op: "BatchMatMulV2" + input: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/ones" + input: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/Cast" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 1 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: false + } + } + attr { + key: "adj_y" + value { + b: true + } + } +} +node { + name: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/sub/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/sub" + op: "Sub" + input: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/sub/x" + input: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/MatMul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 1 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/mul_1/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: -10000.0 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/mul_1" + op: "Mul" + input: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/sub" + input: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/mul_1/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 1 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/add" + op: "AddV2" + input: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/Mul" + input: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/mul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/attention_probs" + op: "Softmax" + input: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/MatMul_2" + op: "BatchMatMulV2" + input: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/attention_probs" + input: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/transpose_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: false + } + } + attr { + key: "adj_y" + value { + b: false + } + } +} +node { + name: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/transpose_3/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\000\000\000\000\002\000\000\000\001\000\000\000\003\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/transpose_3" + op: "Transpose" + input: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/MatMul_2" + input: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/transpose_3/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/output/dense/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\014\000\000\000@\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/output/dense/Reshape" + op: "Reshape" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/output/dense/kernel/read" + input: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/output/dense/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/output/dense/einsum/transpose/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\000\000\000\000\001\000\000\000\003\000\000\000\002\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/output/dense/einsum/transpose" + op: "Transpose" + input: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/transpose_3" + input: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/output/dense/einsum/transpose/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 64 + } + dim { + size: 12 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/output/dense/einsum/transpose_1/perm" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\001\000\000\000\000\000\000\000\002\000\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/output/dense/einsum/transpose_1" + op: "Transpose" + input: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/output/dense/Reshape" + input: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/output/dense/einsum/transpose_1/perm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 64 + } + dim { + size: 12 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/output/dense/einsum/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\004\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/output/dense/einsum/Reshape" + op: "Reshape" + input: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/output/dense/einsum/transpose" + input: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/output/dense/einsum/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/output/dense/einsum/Reshape_1/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/output/dense/einsum/Reshape_1" + op: "Reshape" + input: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/output/dense/einsum/transpose_1" + input: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/output/dense/einsum/Reshape_1/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/output/dense/einsum/MatMul" + op: "MatMul" + input: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/output/dense/einsum/Reshape" + input: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/output/dense/einsum/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/output/dense/einsum/Reshape_2/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/output/dense/einsum/Reshape_2" + op: "Reshape" + input: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/output/dense/einsum/MatMul" + input: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/output/dense/einsum/Reshape_2/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/output/dense/add" + op: "AddV2" + input: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/output/dense/einsum/Reshape_2" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/output/dense/bias/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/add" + op: "AddV2" + input: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/output/dense/add" + input: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm_1/batchnorm/add_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm/moments/mean/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 2 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm/moments/mean" + op: "Mean" + input: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/add" + input: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm/moments/mean/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: true + } + } +} +node { + name: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm/moments/StopGradient" + op: "StopGradient" + input: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm/moments/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm/moments/SquaredDifference" + op: "SquaredDifference" + input: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/add" + input: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm/moments/StopGradient" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm/moments/variance/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 2 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm/moments/variance" + op: "Mean" + input: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm/moments/SquaredDifference" + input: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm/moments/variance/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: true + } + } +} +node { + name: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm/batchnorm/add/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999960041972e-13 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm/batchnorm/add" + op: "AddV2" + input: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm/moments/variance" + input: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm/batchnorm/add/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm/batchnorm/Rsqrt" + op: "Rsqrt" + input: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm/batchnorm/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm/batchnorm/mul" + op: "Mul" + input: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm/batchnorm/Rsqrt" + input: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm/gamma/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm/batchnorm/mul_1" + op: "Mul" + input: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/add" + input: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm/batchnorm/mul_2" + op: "Mul" + input: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm/moments/mean" + input: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm/batchnorm/sub" + op: "Sub" + input: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm/beta/read" + input: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm/batchnorm/mul_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm/batchnorm/add_1" + op: "AddV2" + input: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm/batchnorm/mul_1" + input: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm/batchnorm/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/dense/einsum/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\004\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/dense/einsum/Reshape" + op: "Reshape" + input: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm/batchnorm/add_1" + input: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/dense/einsum/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/dense/einsum/MatMul" + op: "MatMul" + input: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/dense/einsum/Reshape" + input: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/dense/einsum/Reshape_1/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\000\014\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/dense/einsum/Reshape_1" + op: "Reshape" + input: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/dense/einsum/MatMul" + input: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/dense/einsum/Reshape_1/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/dense/add" + op: "AddV2" + input: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/dense/einsum/Reshape_1" + input: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/bias/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/Pow/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 3.0 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/Pow" + op: "Pow" + input: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/dense/add" + input: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/Pow/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/mul/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.044714998453855515 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/mul" + op: "Mul" + input: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/mul/x" + input: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/Pow" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/add" + op: "AddV2" + input: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/dense/add" + input: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/mul_1/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.7978845834732056 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/mul_1" + op: "Mul" + input: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/mul_1/x" + input: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/Tanh" + op: "Tanh" + input: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/mul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/add_1/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/add_1" + op: "AddV2" + input: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/add_1/x" + input: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/Tanh" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/mul_2/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.5 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/mul_2" + op: "Mul" + input: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/mul_2/x" + input: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/add_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/mul_3" + op: "Mul" + input: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/dense/add" + input: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/mul_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\004\000\000\000\014\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape" + op: "Reshape" + input: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/mul_3" + input: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/output/dense/einsum/MatMul" + op: "MatMul" + input: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape" + input: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape_1/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape_1" + op: "Reshape" + input: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/output/dense/einsum/MatMul" + input: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape_1/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/output/dense/add" + op: "AddV2" + input: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape_1" + input: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/bias/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/add_1" + op: "AddV2" + input: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/output/dense/add" + input: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm/batchnorm/add_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm_1/moments/mean/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 2 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm_1/moments/mean" + op: "Mean" + input: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/add_1" + input: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm_1/moments/mean/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: true + } + } +} +node { + name: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm_1/moments/StopGradient" + op: "StopGradient" + input: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm_1/moments/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm_1/moments/SquaredDifference" + op: "SquaredDifference" + input: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/add_1" + input: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm_1/moments/StopGradient" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm_1/moments/variance/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 2 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm_1/moments/variance" + op: "Mean" + input: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm_1/moments/SquaredDifference" + input: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm_1/moments/variance/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: true + } + } +} +node { + name: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm_1/batchnorm/add/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999960041972e-13 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm_1/batchnorm/add" + op: "AddV2" + input: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm_1/moments/variance" + input: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm_1/batchnorm/add/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm_1/batchnorm/Rsqrt" + op: "Rsqrt" + input: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm_1/batchnorm/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm_1/batchnorm/mul" + op: "Mul" + input: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm_1/batchnorm/Rsqrt" + input: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm_1/gamma/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm_1/batchnorm/mul_1" + op: "Mul" + input: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/add_1" + input: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm_1/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm_1/batchnorm/mul_2" + op: "Mul" + input: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm_1/moments/mean" + input: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm_1/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm_1/batchnorm/sub" + op: "Sub" + input: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm_1/beta/read" + input: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm_1/batchnorm/mul_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm_1/batchnorm/add_1" + op: "AddV2" + input: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm_1/batchnorm/mul_1" + input: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm_1/batchnorm/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/pooler/strided_slice/stack" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\000\000\000\000\000\000\000\000\000\000\000\000" + } + } + } +} +node { + name: "bert/pooler/strided_slice/stack_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\000\000\000\000\001\000\000\000\000\000\000\000" + } + } + } +} +node { + name: "bert/pooler/strided_slice/stack_2" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\001\000\000\000\001\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "bert/pooler/strided_slice" + op: "StridedSlice" + input: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm_1/batchnorm/add_1" + input: "bert/pooler/strided_slice/stack" + input: "bert/pooler/strided_slice/stack_1" + input: "bert/pooler/strided_slice/stack_2" + attr { + key: "Index" + value { + type: DT_INT32 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 1 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "begin_mask" + value { + i: 5 + } + } + attr { + key: "ellipsis_mask" + value { + i: 0 + } + } + attr { + key: "end_mask" + value { + i: 5 + } + } + attr { + key: "new_axis_mask" + value { + i: 0 + } + } + attr { + key: "shrink_axis_mask" + value { + i: 0 + } + } +} +node { + name: "bert/pooler/Squeeze" + op: "Squeeze" + input: "bert/pooler/strided_slice" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "squeeze_dims" + value { + list { + i: 1 + } + } + } +} +node { + name: "bert/pooler/dense/kernel/Initializer/truncated_normal/shape" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/pooler/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/pooler/dense/kernel/Initializer/truncated_normal/mean" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/pooler/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/pooler/dense/kernel/Initializer/truncated_normal/stddev" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/pooler/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.019999999552965164 + } + } + } +} +node { + name: "bert/pooler/dense/kernel/Initializer/truncated_normal/TruncatedNormal" + op: "TruncatedNormal" + input: "bert/pooler/dense/kernel/Initializer/truncated_normal/shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/pooler/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "bert/pooler/dense/kernel/Initializer/truncated_normal/mul" + op: "Mul" + input: "bert/pooler/dense/kernel/Initializer/truncated_normal/TruncatedNormal" + input: "bert/pooler/dense/kernel/Initializer/truncated_normal/stddev" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/pooler/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/pooler/dense/kernel/Initializer/truncated_normal" + op: "Add" + input: "bert/pooler/dense/kernel/Initializer/truncated_normal/mul" + input: "bert/pooler/dense/kernel/Initializer/truncated_normal/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/pooler/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/pooler/dense/kernel" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/pooler/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/pooler/dense/kernel/Assign" + op: "Assign" + input: "bert/pooler/dense/kernel" + input: "bert/pooler/dense/kernel/Initializer/truncated_normal" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/pooler/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/pooler/dense/kernel/read" + op: "Identity" + input: "bert/pooler/dense/kernel" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/pooler/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/pooler/dense/bias/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/pooler/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/pooler/dense/bias" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/pooler/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/pooler/dense/bias/Assign" + op: "Assign" + input: "bert/pooler/dense/bias" + input: "bert/pooler/dense/bias/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/pooler/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/pooler/dense/bias/read" + op: "Identity" + input: "bert/pooler/dense/bias" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/pooler/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/pooler/dense/MatMul" + op: "MatMul" + input: "bert/pooler/Squeeze" + input: "bert/pooler/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "bert/pooler/dense/BiasAdd" + op: "BiasAdd" + input: "bert/pooler/dense/MatMul" + input: "bert/pooler/dense/bias/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "bert/pooler/dense/Tanh" + op: "Tanh" + input: "bert/pooler/dense/BiasAdd" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "range/start" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 0 + } + } + } +} +node { + name: "range/limit" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 16 + } + } + } +} +node { + name: "range/delta" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "range" + op: "Range" + input: "range/start" + input: "range/limit" + input: "range/delta" + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + } + } + } + } +} +node { + name: "mul/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 64 + } + } + } +} +node { + name: "mul" + op: "Mul" + input: "range" + input: "mul/y" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + } + } + } + } +} +node { + name: "Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\377\377\377\377\001\000\000\000" + } + } + } +} +node { + name: "Reshape" + op: "Reshape" + input: "mul" + input: "Reshape/shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "add" + op: "AddV2" + input: "IteratorGetNext:3" + input: "Reshape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 20 + } + } + } + } + } +} +node { + name: "Reshape_1/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: -1 + } + } + } +} +node { + name: "Reshape_1" + op: "Reshape" + input: "add" + input: "Reshape_1/shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 320 + } + } + } + } + } +} +node { + name: "Reshape_2/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\004\000\000\000\003\000\000" + } + } + } +} +node { + name: "Reshape_2" + op: "Reshape" + input: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm_1/batchnorm/add_1" + input: "Reshape_2/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "GatherV2/axis" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 0 + } + } + } +} +node { + name: "GatherV2" + op: "GatherV2" + input: "Reshape_2" + input: "Reshape_1" + input: "GatherV2/axis" + attr { + key: "Taxis" + value { + type: DT_INT32 + } + } + attr { + key: "Tindices" + value { + type: DT_INT32 + } + } + attr { + key: "Tparams" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 320 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "batch_dims" + value { + i: 0 + } + } +} +node { + name: "cls/predictions/transform/dense/kernel/Initializer/truncated_normal/shape" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/transform/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\200\000\000\000" + } + } + } +} +node { + name: "cls/predictions/transform/dense/kernel/Initializer/truncated_normal/mean" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/transform/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "cls/predictions/transform/dense/kernel/Initializer/truncated_normal/stddev" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/transform/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.019999999552965164 + } + } + } +} +node { + name: "cls/predictions/transform/dense/kernel/Initializer/truncated_normal/TruncatedNormal" + op: "TruncatedNormal" + input: "cls/predictions/transform/dense/kernel/Initializer/truncated_normal/shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/transform/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "cls/predictions/transform/dense/kernel/Initializer/truncated_normal/mul" + op: "Mul" + input: "cls/predictions/transform/dense/kernel/Initializer/truncated_normal/TruncatedNormal" + input: "cls/predictions/transform/dense/kernel/Initializer/truncated_normal/stddev" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/transform/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "cls/predictions/transform/dense/kernel/Initializer/truncated_normal" + op: "Add" + input: "cls/predictions/transform/dense/kernel/Initializer/truncated_normal/mul" + input: "cls/predictions/transform/dense/kernel/Initializer/truncated_normal/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/transform/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "cls/predictions/transform/dense/kernel" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/transform/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 128 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "cls/predictions/transform/dense/kernel/Assign" + op: "Assign" + input: "cls/predictions/transform/dense/kernel" + input: "cls/predictions/transform/dense/kernel/Initializer/truncated_normal" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/transform/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "cls/predictions/transform/dense/kernel/read" + op: "Identity" + input: "cls/predictions/transform/dense/kernel" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/transform/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "cls/predictions/transform/dense/bias/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/transform/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 128 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 128 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "cls/predictions/transform/dense/bias" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/transform/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 128 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 128 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "cls/predictions/transform/dense/bias/Assign" + op: "Assign" + input: "cls/predictions/transform/dense/bias" + input: "cls/predictions/transform/dense/bias/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/transform/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 128 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "cls/predictions/transform/dense/bias/read" + op: "Identity" + input: "cls/predictions/transform/dense/bias" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/transform/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 128 + } + } + } + } + } +} +node { + name: "cls/predictions/transform/dense/MatMul" + op: "MatMul" + input: "GatherV2" + input: "cls/predictions/transform/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 320 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "cls/predictions/transform/dense/BiasAdd" + op: "BiasAdd" + input: "cls/predictions/transform/dense/MatMul" + input: "cls/predictions/transform/dense/bias/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 320 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "cls/predictions/transform/dense/Pow/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 3.0 + } + } + } +} +node { + name: "cls/predictions/transform/dense/Pow" + op: "Pow" + input: "cls/predictions/transform/dense/BiasAdd" + input: "cls/predictions/transform/dense/Pow/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 320 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "cls/predictions/transform/dense/mul/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.044714998453855515 + } + } + } +} +node { + name: "cls/predictions/transform/dense/mul" + op: "Mul" + input: "cls/predictions/transform/dense/mul/x" + input: "cls/predictions/transform/dense/Pow" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 320 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "cls/predictions/transform/dense/add" + op: "AddV2" + input: "cls/predictions/transform/dense/BiasAdd" + input: "cls/predictions/transform/dense/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 320 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "cls/predictions/transform/dense/mul_1/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.7978845834732056 + } + } + } +} +node { + name: "cls/predictions/transform/dense/mul_1" + op: "Mul" + input: "cls/predictions/transform/dense/mul_1/x" + input: "cls/predictions/transform/dense/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 320 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "cls/predictions/transform/dense/Tanh" + op: "Tanh" + input: "cls/predictions/transform/dense/mul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 320 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "cls/predictions/transform/dense/add_1/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "cls/predictions/transform/dense/add_1" + op: "AddV2" + input: "cls/predictions/transform/dense/add_1/x" + input: "cls/predictions/transform/dense/Tanh" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 320 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "cls/predictions/transform/dense/mul_2/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.5 + } + } + } +} +node { + name: "cls/predictions/transform/dense/mul_2" + op: "Mul" + input: "cls/predictions/transform/dense/mul_2/x" + input: "cls/predictions/transform/dense/add_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 320 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "cls/predictions/transform/dense/mul_3" + op: "Mul" + input: "cls/predictions/transform/dense/BiasAdd" + input: "cls/predictions/transform/dense/mul_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 320 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "cls/predictions/transform/LayerNorm/beta/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/transform/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 128 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 128 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "cls/predictions/transform/LayerNorm/beta" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/transform/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 128 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 128 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "cls/predictions/transform/LayerNorm/beta/Assign" + op: "Assign" + input: "cls/predictions/transform/LayerNorm/beta" + input: "cls/predictions/transform/LayerNorm/beta/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/transform/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 128 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "cls/predictions/transform/LayerNorm/beta/read" + op: "Identity" + input: "cls/predictions/transform/LayerNorm/beta" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/transform/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 128 + } + } + } + } + } +} +node { + name: "cls/predictions/transform/LayerNorm/gamma/Initializer/ones" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/transform/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 128 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 128 + } + } + float_val: 1.0 + } + } + } +} +node { + name: "cls/predictions/transform/LayerNorm/gamma" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/transform/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 128 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 128 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "cls/predictions/transform/LayerNorm/gamma/Assign" + op: "Assign" + input: "cls/predictions/transform/LayerNorm/gamma" + input: "cls/predictions/transform/LayerNorm/gamma/Initializer/ones" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/transform/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 128 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "cls/predictions/transform/LayerNorm/gamma/read" + op: "Identity" + input: "cls/predictions/transform/LayerNorm/gamma" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/transform/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 128 + } + } + } + } + } +} +node { + name: "cls/predictions/transform/LayerNorm/moments/mean/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "cls/predictions/transform/LayerNorm/moments/mean" + op: "Mean" + input: "cls/predictions/transform/dense/mul_3" + input: "cls/predictions/transform/LayerNorm/moments/mean/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 320 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: true + } + } +} +node { + name: "cls/predictions/transform/LayerNorm/moments/StopGradient" + op: "StopGradient" + input: "cls/predictions/transform/LayerNorm/moments/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 320 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "cls/predictions/transform/LayerNorm/moments/SquaredDifference" + op: "SquaredDifference" + input: "cls/predictions/transform/dense/mul_3" + input: "cls/predictions/transform/LayerNorm/moments/StopGradient" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 320 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "cls/predictions/transform/LayerNorm/moments/variance/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "cls/predictions/transform/LayerNorm/moments/variance" + op: "Mean" + input: "cls/predictions/transform/LayerNorm/moments/SquaredDifference" + input: "cls/predictions/transform/LayerNorm/moments/variance/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 320 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: true + } + } +} +node { + name: "cls/predictions/transform/LayerNorm/batchnorm/add/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999960041972e-13 + } + } + } +} +node { + name: "cls/predictions/transform/LayerNorm/batchnorm/add" + op: "AddV2" + input: "cls/predictions/transform/LayerNorm/moments/variance" + input: "cls/predictions/transform/LayerNorm/batchnorm/add/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 320 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "cls/predictions/transform/LayerNorm/batchnorm/Rsqrt" + op: "Rsqrt" + input: "cls/predictions/transform/LayerNorm/batchnorm/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 320 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "cls/predictions/transform/LayerNorm/batchnorm/mul" + op: "Mul" + input: "cls/predictions/transform/LayerNorm/batchnorm/Rsqrt" + input: "cls/predictions/transform/LayerNorm/gamma/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 320 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "cls/predictions/transform/LayerNorm/batchnorm/mul_1" + op: "Mul" + input: "cls/predictions/transform/dense/mul_3" + input: "cls/predictions/transform/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 320 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "cls/predictions/transform/LayerNorm/batchnorm/mul_2" + op: "Mul" + input: "cls/predictions/transform/LayerNorm/moments/mean" + input: "cls/predictions/transform/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 320 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "cls/predictions/transform/LayerNorm/batchnorm/sub" + op: "Sub" + input: "cls/predictions/transform/LayerNorm/beta/read" + input: "cls/predictions/transform/LayerNorm/batchnorm/mul_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 320 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "cls/predictions/transform/LayerNorm/batchnorm/add_1" + op: "AddV2" + input: "cls/predictions/transform/LayerNorm/batchnorm/mul_1" + input: "cls/predictions/transform/LayerNorm/batchnorm/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 320 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "cls/predictions/output_bias/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/output_bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 105686 + } + } + } +} +node { + name: "cls/predictions/output_bias/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/output_bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "cls/predictions/output_bias/Initializer/zeros" + op: "Fill" + input: "cls/predictions/output_bias/Initializer/zeros/shape_as_tensor" + input: "cls/predictions/output_bias/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/output_bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 105686 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "cls/predictions/output_bias" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/output_bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 105686 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 105686 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "cls/predictions/output_bias/Assign" + op: "Assign" + input: "cls/predictions/output_bias" + input: "cls/predictions/output_bias/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/output_bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 105686 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "cls/predictions/output_bias/read" + op: "Identity" + input: "cls/predictions/output_bias" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/output_bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 105686 + } + } + } + } + } +} +node { + name: "cls/predictions/MatMul" + op: "MatMul" + input: "cls/predictions/transform/LayerNorm/batchnorm/add_1" + input: "bert/embeddings/word_embeddings/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 320 + } + dim { + size: 105686 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "cls/predictions/BiasAdd" + op: "BiasAdd" + input: "cls/predictions/MatMul" + input: "cls/predictions/output_bias/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 320 + } + dim { + size: 105686 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "cls/predictions/LogSoftmax" + op: "LogSoftmax" + input: "cls/predictions/BiasAdd" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 320 + } + dim { + size: 105686 + } + } + } + } + } +} +node { + name: "cls/predictions/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: -1 + } + } + } +} +node { + name: "cls/predictions/Reshape" + op: "Reshape" + input: "IteratorGetNext:2" + input: "cls/predictions/Reshape/shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 320 + } + } + } + } + } +} +node { + name: "cls/predictions/Reshape_1/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: -1 + } + } + } +} +node { + name: "cls/predictions/Reshape_1" + op: "Reshape" + input: "IteratorGetNext:4" + input: "cls/predictions/Reshape_1/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 320 + } + } + } + } + } +} +node { + name: "cls/predictions/one_hot/on_value" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "cls/predictions/one_hot/off_value" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "cls/predictions/one_hot/depth" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 105686 + } + } + } +} +node { + name: "cls/predictions/one_hot" + op: "OneHot" + input: "cls/predictions/Reshape" + input: "cls/predictions/one_hot/depth" + input: "cls/predictions/one_hot/on_value" + input: "cls/predictions/one_hot/off_value" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "TI" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 320 + } + dim { + size: 105686 + } + } + } + } + } + attr { + key: "axis" + value { + i: -1 + } + } +} +node { + name: "cls/predictions/mul" + op: "Mul" + input: "cls/predictions/LogSoftmax" + input: "cls/predictions/one_hot" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 320 + } + dim { + size: 105686 + } + } + } + } + } +} +node { + name: "cls/predictions/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: -1 + } + } + } +} +node { + name: "cls/predictions/Sum" + op: "Sum" + input: "cls/predictions/mul" + input: "cls/predictions/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 320 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "cls/predictions/Neg" + op: "Neg" + input: "cls/predictions/Sum" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 320 + } + } + } + } + } +} +node { + name: "cls/predictions/mul_1" + op: "Mul" + input: "cls/predictions/Reshape_1" + input: "cls/predictions/Neg" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 320 + } + } + } + } + } +} +node { + name: "cls/predictions/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 0 + } + } + } +} +node { + name: "cls/predictions/Sum_1" + op: "Sum" + input: "cls/predictions/mul_1" + input: "cls/predictions/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "cls/predictions/Const_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 0 + } + } + } +} +node { + name: "cls/predictions/Sum_2" + op: "Sum" + input: "cls/predictions/Reshape_1" + input: "cls/predictions/Const_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "cls/predictions/add/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999747378752e-06 + } + } + } +} +node { + name: "cls/predictions/add" + op: "AddV2" + input: "cls/predictions/Sum_2" + input: "cls/predictions/add/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "cls/predictions/truediv" + op: "RealDiv" + input: "cls/predictions/Sum_1" + input: "cls/predictions/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "cls/seq_relationship/output_weights/Initializer/truncated_normal/shape" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@cls/seq_relationship/output_weights" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\002\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "cls/seq_relationship/output_weights/Initializer/truncated_normal/mean" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@cls/seq_relationship/output_weights" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "cls/seq_relationship/output_weights/Initializer/truncated_normal/stddev" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@cls/seq_relationship/output_weights" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.019999999552965164 + } + } + } +} +node { + name: "cls/seq_relationship/output_weights/Initializer/truncated_normal/TruncatedNormal" + op: "TruncatedNormal" + input: "cls/seq_relationship/output_weights/Initializer/truncated_normal/shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_class" + value { + list { + s: "loc:@cls/seq_relationship/output_weights" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "seed" + value { + i: 0 + } + } + attr { + key: "seed2" + value { + i: 0 + } + } +} +node { + name: "cls/seq_relationship/output_weights/Initializer/truncated_normal/mul" + op: "Mul" + input: "cls/seq_relationship/output_weights/Initializer/truncated_normal/TruncatedNormal" + input: "cls/seq_relationship/output_weights/Initializer/truncated_normal/stddev" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@cls/seq_relationship/output_weights" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "cls/seq_relationship/output_weights/Initializer/truncated_normal" + op: "Add" + input: "cls/seq_relationship/output_weights/Initializer/truncated_normal/mul" + input: "cls/seq_relationship/output_weights/Initializer/truncated_normal/mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@cls/seq_relationship/output_weights" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "cls/seq_relationship/output_weights" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@cls/seq_relationship/output_weights" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 2 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "cls/seq_relationship/output_weights/Assign" + op: "Assign" + input: "cls/seq_relationship/output_weights" + input: "cls/seq_relationship/output_weights/Initializer/truncated_normal" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@cls/seq_relationship/output_weights" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "cls/seq_relationship/output_weights/read" + op: "Identity" + input: "cls/seq_relationship/output_weights" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@cls/seq_relationship/output_weights" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "cls/seq_relationship/output_bias/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@cls/seq_relationship/output_bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 2 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "cls/seq_relationship/output_bias" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@cls/seq_relationship/output_bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 2 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "cls/seq_relationship/output_bias/Assign" + op: "Assign" + input: "cls/seq_relationship/output_bias" + input: "cls/seq_relationship/output_bias/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@cls/seq_relationship/output_bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "cls/seq_relationship/output_bias/read" + op: "Identity" + input: "cls/seq_relationship/output_bias" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@cls/seq_relationship/output_bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "cls/seq_relationship/MatMul" + op: "MatMul" + input: "bert/pooler/dense/Tanh" + input: "cls/seq_relationship/output_weights/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 2 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "cls/seq_relationship/BiasAdd" + op: "BiasAdd" + input: "cls/seq_relationship/MatMul" + input: "cls/seq_relationship/output_bias/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 2 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "cls/seq_relationship/LogSoftmax" + op: "LogSoftmax" + input: "cls/seq_relationship/BiasAdd" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 2 + } + } + } + } + } +} +node { + name: "cls/seq_relationship/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: -1 + } + } + } +} +node { + name: "cls/seq_relationship/Reshape" + op: "Reshape" + input: "IteratorGetNext:5" + input: "cls/seq_relationship/Reshape/shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + } + } + } + } +} +node { + name: "cls/seq_relationship/one_hot/on_value" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "cls/seq_relationship/one_hot/off_value" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "cls/seq_relationship/one_hot/depth" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 2 + } + } + } +} +node { + name: "cls/seq_relationship/one_hot" + op: "OneHot" + input: "cls/seq_relationship/Reshape" + input: "cls/seq_relationship/one_hot/depth" + input: "cls/seq_relationship/one_hot/on_value" + input: "cls/seq_relationship/one_hot/off_value" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "TI" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 2 + } + } + } + } + } + attr { + key: "axis" + value { + i: -1 + } + } +} +node { + name: "cls/seq_relationship/mul" + op: "Mul" + input: "cls/seq_relationship/one_hot" + input: "cls/seq_relationship/LogSoftmax" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 2 + } + } + } + } + } +} +node { + name: "cls/seq_relationship/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: -1 + } + } + } +} +node { + name: "cls/seq_relationship/Sum" + op: "Sum" + input: "cls/seq_relationship/mul" + input: "cls/seq_relationship/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "cls/seq_relationship/Neg" + op: "Neg" + input: "cls/seq_relationship/Sum" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + } + } + } + } +} +node { + name: "cls/seq_relationship/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 0 + } + } + } +} +node { + name: "cls/seq_relationship/Mean" + op: "Mean" + input: "cls/seq_relationship/Neg" + input: "cls/seq_relationship/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "add_1" + op: "AddV2" + input: "cls/predictions/truediv" + input: "cls/seq_relationship/Mean" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "Const_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0017600000137463212 + } + } + } +} +node { + name: "PolynomialDecay/Cast/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "PolynomialDecay/Cast_1/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "PolynomialDecay/Cast_2/ReadVariableOp" + op: "ReadVariableOp" + input: "global_step" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT64 + } + } +} +node { + name: "PolynomialDecay/Cast_2" + op: "Cast" + input: "PolynomialDecay/Cast_2/ReadVariableOp" + attr { + key: "DstT" + value { + type: DT_FLOAT + } + } + attr { + key: "SrcT" + value { + type: DT_INT64 + } + } + attr { + key: "Truncate" + value { + b: false + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "PolynomialDecay/Cast_3/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 100000 + } + } + } +} +node { + name: "PolynomialDecay/Cast_3" + op: "Cast" + input: "PolynomialDecay/Cast_3/x" + attr { + key: "DstT" + value { + type: DT_FLOAT + } + } + attr { + key: "SrcT" + value { + type: DT_INT32 + } + } + attr { + key: "Truncate" + value { + b: false + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "PolynomialDecay/Minimum/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 100000.0 + } + } + } +} +node { + name: "PolynomialDecay/Minimum" + op: "Minimum" + input: "PolynomialDecay/Cast_2" + input: "PolynomialDecay/Minimum/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "PolynomialDecay/truediv" + op: "RealDiv" + input: "PolynomialDecay/Minimum" + input: "PolynomialDecay/Cast_3" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "PolynomialDecay/sub" + op: "Sub" + input: "Const_1" + input: "PolynomialDecay/Cast/x" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "PolynomialDecay/sub_1/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "PolynomialDecay/sub_1" + op: "Sub" + input: "PolynomialDecay/sub_1/x" + input: "PolynomialDecay/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "PolynomialDecay/Pow" + op: "Pow" + input: "PolynomialDecay/sub_1" + input: "PolynomialDecay/Cast_1/x" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "PolynomialDecay/Mul" + op: "Mul" + input: "PolynomialDecay/sub" + input: "PolynomialDecay/Pow" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "PolynomialDecay" + op: "Add" + input: "PolynomialDecay/Mul" + input: "PolynomialDecay/Cast/x" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "Cast/ReadVariableOp" + op: "ReadVariableOp" + input: "global_step" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT64 + } + } +} +node { + name: "Cast" + op: "Cast" + input: "Cast/ReadVariableOp" + attr { + key: "DstT" + value { + type: DT_INT32 + } + } + attr { + key: "SrcT" + value { + type: DT_INT64 + } + } + attr { + key: "Truncate" + value { + b: false + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "Const_2" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 0 + } + } + } +} +node { + name: "sub" + op: "Sub" + input: "Cast" + input: "Const_2" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "Const_3" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 100 + } + } + } +} +node { + name: "Cast_1" + op: "Cast" + input: "sub" + attr { + key: "DstT" + value { + type: DT_FLOAT + } + } + attr { + key: "SrcT" + value { + type: DT_INT32 + } + } + attr { + key: "Truncate" + value { + b: false + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "Cast_2" + op: "Cast" + input: "Const_3" + attr { + key: "DstT" + value { + type: DT_FLOAT + } + } + attr { + key: "SrcT" + value { + type: DT_INT32 + } + } + attr { + key: "Truncate" + value { + b: false + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "truediv" + op: "RealDiv" + input: "Cast_1" + input: "Cast_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "mul_1/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0017600000137463212 + } + } + } +} +node { + name: "mul_1" + op: "Mul" + input: "mul_1/x" + input: "truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "Less" + op: "Less" + input: "sub" + input: "Const_3" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "Cast_3" + op: "Cast" + input: "Less" + attr { + key: "DstT" + value { + type: DT_FLOAT + } + } + attr { + key: "SrcT" + value { + type: DT_BOOL + } + } + attr { + key: "Truncate" + value { + b: false + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "sub_1/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "sub_1" + op: "Sub" + input: "sub_1/x" + input: "Cast_3" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "mul_2" + op: "Mul" + input: "sub_1" + input: "PolynomialDecay" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "mul_3" + op: "Mul" + input: "Cast_3" + input: "mul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "add_2" + op: "AddV2" + input: "mul_2" + input: "mul_3" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/grad_ys_0" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "gradients/Fill" + op: "Fill" + input: "gradients/Shape" + input: "gradients/grad_ys_0" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "gradients/cls/predictions/truediv_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/cls/predictions/truediv_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/cls/predictions/truediv_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/cls/predictions/truediv_grad/Shape" + input: "gradients/cls/predictions/truediv_grad/Shape_1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/cls/predictions/truediv_grad/RealDiv" + op: "RealDiv" + input: "gradients/Fill" + input: "cls/predictions/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/cls/predictions/truediv_grad/Sum" + op: "Sum" + input: "gradients/cls/predictions/truediv_grad/RealDiv" + input: "gradients/cls/predictions/truediv_grad/BroadcastGradientArgs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/cls/predictions/truediv_grad/Reshape" + op: "Reshape" + input: "gradients/cls/predictions/truediv_grad/Sum" + input: "gradients/cls/predictions/truediv_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/cls/predictions/truediv_grad/Neg" + op: "Neg" + input: "cls/predictions/Sum_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/cls/predictions/truediv_grad/RealDiv_1" + op: "RealDiv" + input: "gradients/cls/predictions/truediv_grad/Neg" + input: "cls/predictions/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/cls/predictions/truediv_grad/RealDiv_2" + op: "RealDiv" + input: "gradients/cls/predictions/truediv_grad/RealDiv_1" + input: "cls/predictions/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/cls/predictions/truediv_grad/mul" + op: "Mul" + input: "gradients/Fill" + input: "gradients/cls/predictions/truediv_grad/RealDiv_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/cls/predictions/truediv_grad/Sum_1" + op: "Sum" + input: "gradients/cls/predictions/truediv_grad/mul" + input: "gradients/cls/predictions/truediv_grad/BroadcastGradientArgs:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/cls/predictions/truediv_grad/Reshape_1" + op: "Reshape" + input: "gradients/cls/predictions/truediv_grad/Sum_1" + input: "gradients/cls/predictions/truediv_grad/Shape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/cls/seq_relationship/Mean_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "gradients/cls/seq_relationship/Mean_grad/Reshape" + op: "Reshape" + input: "gradients/Fill" + input: "gradients/cls/seq_relationship/Mean_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/cls/seq_relationship/Mean_grad/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 16 + } + } + } +} +node { + name: "gradients/cls/seq_relationship/Mean_grad/Tile" + op: "Tile" + input: "gradients/cls/seq_relationship/Mean_grad/Reshape" + input: "gradients/cls/seq_relationship/Mean_grad/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tmultiples" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + } + } + } + } +} +node { + name: "gradients/cls/seq_relationship/Mean_grad/Const_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 16.0 + } + } + } +} +node { + name: "gradients/cls/seq_relationship/Mean_grad/truediv" + op: "RealDiv" + input: "gradients/cls/seq_relationship/Mean_grad/Tile" + input: "gradients/cls/seq_relationship/Mean_grad/Const_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + } + } + } + } +} +node { + name: "gradients/cls/predictions/Sum_1_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "gradients/cls/predictions/Sum_1_grad/Reshape" + op: "Reshape" + input: "gradients/cls/predictions/truediv_grad/Reshape" + input: "gradients/cls/predictions/Sum_1_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/cls/predictions/Sum_1_grad/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 320 + } + } + } +} +node { + name: "gradients/cls/predictions/Sum_1_grad/Tile" + op: "Tile" + input: "gradients/cls/predictions/Sum_1_grad/Reshape" + input: "gradients/cls/predictions/Sum_1_grad/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tmultiples" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 320 + } + } + } + } + } +} +node { + name: "gradients/cls/seq_relationship/Neg_grad/Neg" + op: "Neg" + input: "gradients/cls/seq_relationship/Mean_grad/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + } + } + } + } +} +node { + name: "gradients/cls/predictions/mul_1_grad/Mul" + op: "Mul" + input: "gradients/cls/predictions/Sum_1_grad/Tile" + input: "cls/predictions/Neg" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 320 + } + } + } + } + } +} +node { + name: "gradients/cls/predictions/mul_1_grad/Mul_1" + op: "Mul" + input: "gradients/cls/predictions/Sum_1_grad/Tile" + input: "cls/predictions/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 320 + } + } + } + } + } +} +node { + name: "gradients/cls/seq_relationship/Sum_grad/Cast/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\020\000\000\000\002\000\000\000" + } + } + } +} +node { + name: "gradients/cls/seq_relationship/Sum_grad/Cast_1/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: -1 + } + } + } +} +node { + name: "gradients/cls/seq_relationship/Sum_grad/Size" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 2 + } + } + } +} +node { + name: "gradients/cls/seq_relationship/Sum_grad/add" + op: "AddV2" + input: "gradients/cls/seq_relationship/Sum_grad/Cast_1/x" + input: "gradients/cls/seq_relationship/Sum_grad/Size" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/cls/seq_relationship/Sum_grad/mod" + op: "FloorMod" + input: "gradients/cls/seq_relationship/Sum_grad/add" + input: "gradients/cls/seq_relationship/Sum_grad/Size" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/cls/seq_relationship/Sum_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "gradients/cls/seq_relationship/Sum_grad/range/start" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 0 + } + } + } +} +node { + name: "gradients/cls/seq_relationship/Sum_grad/range/delta" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/cls/seq_relationship/Sum_grad/range" + op: "Range" + input: "gradients/cls/seq_relationship/Sum_grad/range/start" + input: "gradients/cls/seq_relationship/Sum_grad/Size" + input: "gradients/cls/seq_relationship/Sum_grad/range/delta" + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/cls/seq_relationship/Sum_grad/Fill/value" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/cls/seq_relationship/Sum_grad/Fill" + op: "Fill" + input: "gradients/cls/seq_relationship/Sum_grad/Shape" + input: "gradients/cls/seq_relationship/Sum_grad/Fill/value" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "gradients/cls/seq_relationship/Sum_grad/DynamicStitch" + op: "DynamicStitch" + input: "gradients/cls/seq_relationship/Sum_grad/range" + input: "gradients/cls/seq_relationship/Sum_grad/mod" + input: "gradients/cls/seq_relationship/Sum_grad/Cast/x" + input: "gradients/cls/seq_relationship/Sum_grad/Fill" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/cls/seq_relationship/Sum_grad/Maximum/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\020\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/cls/seq_relationship/Sum_grad/Maximum/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/cls/seq_relationship/Sum_grad/Maximum" + op: "Maximum" + input: "gradients/cls/seq_relationship/Sum_grad/Maximum/x" + input: "gradients/cls/seq_relationship/Sum_grad/Maximum/y" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/cls/seq_relationship/Sum_grad/floordiv/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\020\000\000\000\002\000\000\000" + } + } + } +} +node { + name: "gradients/cls/seq_relationship/Sum_grad/floordiv" + op: "FloorDiv" + input: "gradients/cls/seq_relationship/Sum_grad/floordiv/x" + input: "gradients/cls/seq_relationship/Sum_grad/Maximum" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/cls/seq_relationship/Sum_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\020\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/cls/seq_relationship/Sum_grad/Reshape" + op: "Reshape" + input: "gradients/cls/seq_relationship/Neg_grad/Neg" + input: "gradients/cls/seq_relationship/Sum_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/cls/seq_relationship/Sum_grad/Tile/multiples" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\001\000\000\000\002\000\000\000" + } + } + } +} +node { + name: "gradients/cls/seq_relationship/Sum_grad/Tile" + op: "Tile" + input: "gradients/cls/seq_relationship/Sum_grad/Reshape" + input: "gradients/cls/seq_relationship/Sum_grad/Tile/multiples" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tmultiples" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/cls/predictions/Neg_grad/Neg" + op: "Neg" + input: "gradients/cls/predictions/mul_1_grad/Mul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 320 + } + } + } + } + } +} +node { + name: "gradients/cls/seq_relationship/mul_grad/Mul" + op: "Mul" + input: "gradients/cls/seq_relationship/Sum_grad/Tile" + input: "cls/seq_relationship/LogSoftmax" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/cls/seq_relationship/mul_grad/Mul_1" + op: "Mul" + input: "gradients/cls/seq_relationship/Sum_grad/Tile" + input: "cls/seq_relationship/one_hot" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/cls/predictions/Sum_grad/Cast/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "@\001\000\000\326\234\001\000" + } + } + } +} +node { + name: "gradients/cls/predictions/Sum_grad/Cast_1/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: -1 + } + } + } +} +node { + name: "gradients/cls/predictions/Sum_grad/Size" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 2 + } + } + } +} +node { + name: "gradients/cls/predictions/Sum_grad/add" + op: "AddV2" + input: "gradients/cls/predictions/Sum_grad/Cast_1/x" + input: "gradients/cls/predictions/Sum_grad/Size" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/cls/predictions/Sum_grad/mod" + op: "FloorMod" + input: "gradients/cls/predictions/Sum_grad/add" + input: "gradients/cls/predictions/Sum_grad/Size" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/cls/predictions/Sum_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "gradients/cls/predictions/Sum_grad/range/start" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 0 + } + } + } +} +node { + name: "gradients/cls/predictions/Sum_grad/range/delta" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/cls/predictions/Sum_grad/range" + op: "Range" + input: "gradients/cls/predictions/Sum_grad/range/start" + input: "gradients/cls/predictions/Sum_grad/Size" + input: "gradients/cls/predictions/Sum_grad/range/delta" + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/cls/predictions/Sum_grad/Fill/value" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/cls/predictions/Sum_grad/Fill" + op: "Fill" + input: "gradients/cls/predictions/Sum_grad/Shape" + input: "gradients/cls/predictions/Sum_grad/Fill/value" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "gradients/cls/predictions/Sum_grad/DynamicStitch" + op: "DynamicStitch" + input: "gradients/cls/predictions/Sum_grad/range" + input: "gradients/cls/predictions/Sum_grad/mod" + input: "gradients/cls/predictions/Sum_grad/Cast/x" + input: "gradients/cls/predictions/Sum_grad/Fill" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/cls/predictions/Sum_grad/Maximum/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "@\001\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/cls/predictions/Sum_grad/Maximum/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/cls/predictions/Sum_grad/Maximum" + op: "Maximum" + input: "gradients/cls/predictions/Sum_grad/Maximum/x" + input: "gradients/cls/predictions/Sum_grad/Maximum/y" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/cls/predictions/Sum_grad/floordiv/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "@\001\000\000\326\234\001\000" + } + } + } +} +node { + name: "gradients/cls/predictions/Sum_grad/floordiv" + op: "FloorDiv" + input: "gradients/cls/predictions/Sum_grad/floordiv/x" + input: "gradients/cls/predictions/Sum_grad/Maximum" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/cls/predictions/Sum_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "@\001\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/cls/predictions/Sum_grad/Reshape" + op: "Reshape" + input: "gradients/cls/predictions/Neg_grad/Neg" + input: "gradients/cls/predictions/Sum_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 320 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/cls/predictions/Sum_grad/Tile/multiples" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\001\000\000\000\326\234\001\000" + } + } + } +} +node { + name: "gradients/cls/predictions/Sum_grad/Tile" + op: "Tile" + input: "gradients/cls/predictions/Sum_grad/Reshape" + input: "gradients/cls/predictions/Sum_grad/Tile/multiples" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tmultiples" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 320 + } + dim { + size: 105686 + } + } + } + } + } +} +node { + name: "gradients/cls/seq_relationship/LogSoftmax_grad/Exp" + op: "Exp" + input: "cls/seq_relationship/LogSoftmax" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/cls/seq_relationship/LogSoftmax_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: -1 + } + } + } +} +node { + name: "gradients/cls/seq_relationship/LogSoftmax_grad/Sum" + op: "Sum" + input: "gradients/cls/seq_relationship/mul_grad/Mul_1" + input: "gradients/cls/seq_relationship/LogSoftmax_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: true + } + } +} +node { + name: "gradients/cls/seq_relationship/LogSoftmax_grad/mul" + op: "Mul" + input: "gradients/cls/seq_relationship/LogSoftmax_grad/Sum" + input: "gradients/cls/seq_relationship/LogSoftmax_grad/Exp" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/cls/seq_relationship/LogSoftmax_grad/sub" + op: "Sub" + input: "gradients/cls/seq_relationship/mul_grad/Mul_1" + input: "gradients/cls/seq_relationship/LogSoftmax_grad/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/cls/predictions/mul_grad/Mul" + op: "Mul" + input: "gradients/cls/predictions/Sum_grad/Tile" + input: "cls/predictions/one_hot" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 320 + } + dim { + size: 105686 + } + } + } + } + } +} +node { + name: "gradients/cls/predictions/mul_grad/Mul_1" + op: "Mul" + input: "gradients/cls/predictions/Sum_grad/Tile" + input: "cls/predictions/LogSoftmax" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 320 + } + dim { + size: 105686 + } + } + } + } + } +} +node { + name: "gradients/cls/seq_relationship/BiasAdd_grad/BiasAddGrad" + op: "BiasAddGrad" + input: "gradients/cls/seq_relationship/LogSoftmax_grad/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "gradients/cls/predictions/LogSoftmax_grad/Exp" + op: "Exp" + input: "cls/predictions/LogSoftmax" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 320 + } + dim { + size: 105686 + } + } + } + } + } +} +node { + name: "gradients/cls/predictions/LogSoftmax_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: -1 + } + } + } +} +node { + name: "gradients/cls/predictions/LogSoftmax_grad/Sum" + op: "Sum" + input: "gradients/cls/predictions/mul_grad/Mul" + input: "gradients/cls/predictions/LogSoftmax_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 320 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: true + } + } +} +node { + name: "gradients/cls/predictions/LogSoftmax_grad/mul" + op: "Mul" + input: "gradients/cls/predictions/LogSoftmax_grad/Sum" + input: "gradients/cls/predictions/LogSoftmax_grad/Exp" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 320 + } + dim { + size: 105686 + } + } + } + } + } +} +node { + name: "gradients/cls/predictions/LogSoftmax_grad/sub" + op: "Sub" + input: "gradients/cls/predictions/mul_grad/Mul" + input: "gradients/cls/predictions/LogSoftmax_grad/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 320 + } + dim { + size: 105686 + } + } + } + } + } +} +node { + name: "gradients/cls/seq_relationship/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/cls/seq_relationship/LogSoftmax_grad/sub" + input: "cls/seq_relationship/output_weights/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/cls/seq_relationship/MatMul_grad/MatMul_1" + op: "MatMul" + input: "gradients/cls/seq_relationship/LogSoftmax_grad/sub" + input: "bert/pooler/dense/Tanh" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/cls/predictions/BiasAdd_grad/BiasAddGrad" + op: "BiasAddGrad" + input: "gradients/cls/predictions/LogSoftmax_grad/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 105686 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "gradients/bert/pooler/dense/Tanh_grad/TanhGrad" + op: "TanhGrad" + input: "bert/pooler/dense/Tanh" + input: "gradients/cls/seq_relationship/MatMul_grad/MatMul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/cls/predictions/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/cls/predictions/LogSoftmax_grad/sub" + input: "bert/embeddings/word_embeddings/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 320 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/cls/predictions/MatMul_grad/MatMul_1" + op: "MatMul" + input: "gradients/cls/predictions/LogSoftmax_grad/sub" + input: "cls/predictions/transform/LayerNorm/batchnorm/add_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 105686 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/bert/pooler/dense/BiasAdd_grad/BiasAddGrad" + op: "BiasAddGrad" + input: "gradients/bert/pooler/dense/Tanh_grad/TanhGrad" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "gradients/bert/pooler/dense/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/bert/pooler/dense/Tanh_grad/TanhGrad" + input: "bert/pooler/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "gradients/bert/pooler/dense/MatMul_grad/MatMul_1" + op: "MatMul" + input: "bert/pooler/Squeeze" + input: "gradients/bert/pooler/dense/Tanh_grad/TanhGrad" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/cls/predictions/transform/LayerNorm/batchnorm/mul_1_grad/Mul" + op: "Mul" + input: "gradients/cls/predictions/MatMul_grad/MatMul" + input: "cls/predictions/transform/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 320 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/cls/predictions/transform/LayerNorm/batchnorm/mul_1_grad/Mul_1" + op: "Mul" + input: "gradients/cls/predictions/MatMul_grad/MatMul" + input: "cls/predictions/transform/dense/mul_3" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 320 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/cls/predictions/transform/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs/s0" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 128 + } + } + } +} +node { + name: "gradients/cls/predictions/transform/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs/s1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "@\001\000\000\200\000\000\000" + } + } + } +} +node { + name: "gradients/cls/predictions/transform/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/cls/predictions/transform/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs/s0" + input: "gradients/cls/predictions/transform/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs/s1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/cls/predictions/transform/LayerNorm/batchnorm/sub_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 0 + } + } + } +} +node { + name: "gradients/cls/predictions/transform/LayerNorm/batchnorm/sub_grad/Sum" + op: "Sum" + input: "gradients/cls/predictions/MatMul_grad/MatMul" + input: "gradients/cls/predictions/transform/LayerNorm/batchnorm/sub_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 128 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/cls/predictions/transform/LayerNorm/batchnorm/sub_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 128 + } + } + } +} +node { + name: "gradients/cls/predictions/transform/LayerNorm/batchnorm/sub_grad/Reshape" + op: "Reshape" + input: "gradients/cls/predictions/transform/LayerNorm/batchnorm/sub_grad/Sum" + input: "gradients/cls/predictions/transform/LayerNorm/batchnorm/sub_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/cls/predictions/transform/LayerNorm/batchnorm/sub_grad/Neg" + op: "Neg" + input: "gradients/cls/predictions/MatMul_grad/MatMul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 320 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/pooler/Squeeze_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000\001\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/pooler/Squeeze_grad/Reshape" + op: "Reshape" + input: "gradients/bert/pooler/dense/MatMul_grad/MatMul" + input: "gradients/bert/pooler/Squeeze_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 1 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/cls/predictions/transform/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs/s0" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "@\001\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/cls/predictions/transform/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs/s1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "@\001\000\000\200\000\000\000" + } + } + } +} +node { + name: "gradients/cls/predictions/transform/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/cls/predictions/transform/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs/s0" + input: "gradients/cls/predictions/transform/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs/s1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/cls/predictions/transform/LayerNorm/batchnorm/mul_2_grad/Mul" + op: "Mul" + input: "gradients/cls/predictions/transform/LayerNorm/batchnorm/sub_grad/Neg" + input: "cls/predictions/transform/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 320 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/cls/predictions/transform/LayerNorm/batchnorm/mul_2_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "gradients/cls/predictions/transform/LayerNorm/batchnorm/mul_2_grad/Sum" + op: "Sum" + input: "gradients/cls/predictions/transform/LayerNorm/batchnorm/mul_2_grad/Mul" + input: "gradients/cls/predictions/transform/LayerNorm/batchnorm/mul_2_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 320 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/cls/predictions/transform/LayerNorm/batchnorm/mul_2_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "@\001\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/cls/predictions/transform/LayerNorm/batchnorm/mul_2_grad/Reshape" + op: "Reshape" + input: "gradients/cls/predictions/transform/LayerNorm/batchnorm/mul_2_grad/Sum" + input: "gradients/cls/predictions/transform/LayerNorm/batchnorm/mul_2_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 320 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/cls/predictions/transform/LayerNorm/batchnorm/mul_2_grad/Mul_1" + op: "Mul" + input: "cls/predictions/transform/LayerNorm/moments/mean" + input: "gradients/cls/predictions/transform/LayerNorm/batchnorm/sub_grad/Neg" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 320 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/pooler/strided_slice_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/pooler/strided_slice_grad/StridedSliceGrad" + op: "StridedSliceGrad" + input: "gradients/bert/pooler/strided_slice_grad/Shape" + input: "bert/pooler/strided_slice/stack" + input: "bert/pooler/strided_slice/stack_1" + input: "bert/pooler/strided_slice/stack_2" + input: "gradients/bert/pooler/Squeeze_grad/Reshape" + attr { + key: "Index" + value { + type: DT_INT32 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "begin_mask" + value { + i: 5 + } + } + attr { + key: "ellipsis_mask" + value { + i: 0 + } + } + attr { + key: "end_mask" + value { + i: 5 + } + } + attr { + key: "new_axis_mask" + value { + i: 0 + } + } + attr { + key: "shrink_axis_mask" + value { + i: 0 + } + } +} +node { + name: "gradients/AddN" + op: "AddN" + input: "gradients/cls/predictions/transform/LayerNorm/batchnorm/mul_1_grad/Mul_1" + input: "gradients/cls/predictions/transform/LayerNorm/batchnorm/mul_2_grad/Mul_1" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/cls/predictions/transform/LayerNorm/batchnorm/mul_1_grad/Mul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 320 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/cls/predictions/transform/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs/s0" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "@\001\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/cls/predictions/transform/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs/s1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 128 + } + } + } +} +node { + name: "gradients/cls/predictions/transform/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/cls/predictions/transform/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs/s0" + input: "gradients/cls/predictions/transform/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs/s1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/cls/predictions/transform/LayerNorm/batchnorm/mul_grad/Mul" + op: "Mul" + input: "gradients/AddN" + input: "cls/predictions/transform/LayerNorm/gamma/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 320 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/cls/predictions/transform/LayerNorm/batchnorm/mul_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "gradients/cls/predictions/transform/LayerNorm/batchnorm/mul_grad/Sum" + op: "Sum" + input: "gradients/cls/predictions/transform/LayerNorm/batchnorm/mul_grad/Mul" + input: "gradients/cls/predictions/transform/LayerNorm/batchnorm/mul_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 320 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/cls/predictions/transform/LayerNorm/batchnorm/mul_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "@\001\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/cls/predictions/transform/LayerNorm/batchnorm/mul_grad/Reshape" + op: "Reshape" + input: "gradients/cls/predictions/transform/LayerNorm/batchnorm/mul_grad/Sum" + input: "gradients/cls/predictions/transform/LayerNorm/batchnorm/mul_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 320 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/cls/predictions/transform/LayerNorm/batchnorm/mul_grad/Mul_1" + op: "Mul" + input: "cls/predictions/transform/LayerNorm/batchnorm/Rsqrt" + input: "gradients/AddN" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 320 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/cls/predictions/transform/LayerNorm/batchnorm/mul_grad/Sum_1/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 0 + } + } + } +} +node { + name: "gradients/cls/predictions/transform/LayerNorm/batchnorm/mul_grad/Sum_1" + op: "Sum" + input: "gradients/cls/predictions/transform/LayerNorm/batchnorm/mul_grad/Mul_1" + input: "gradients/cls/predictions/transform/LayerNorm/batchnorm/mul_grad/Sum_1/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 128 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/cls/predictions/transform/LayerNorm/batchnorm/mul_grad/Reshape_1/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 128 + } + } + } +} +node { + name: "gradients/cls/predictions/transform/LayerNorm/batchnorm/mul_grad/Reshape_1" + op: "Reshape" + input: "gradients/cls/predictions/transform/LayerNorm/batchnorm/mul_grad/Sum_1" + input: "gradients/cls/predictions/transform/LayerNorm/batchnorm/mul_grad/Reshape_1/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/cls/predictions/transform/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad" + op: "RsqrtGrad" + input: "cls/predictions/transform/LayerNorm/batchnorm/Rsqrt" + input: "gradients/cls/predictions/transform/LayerNorm/batchnorm/mul_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 320 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/cls/predictions/transform/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs/s0" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "@\001\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/cls/predictions/transform/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs/s1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/cls/predictions/transform/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/cls/predictions/transform/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs/s0" + input: "gradients/cls/predictions/transform/LayerNorm/batchnorm/add_grad/BroadcastGradientArgs/s1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/cls/predictions/transform/LayerNorm/batchnorm/add_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/cls/predictions/transform/LayerNorm/batchnorm/add_grad/Sum" + op: "Sum" + input: "gradients/cls/predictions/transform/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad" + input: "gradients/cls/predictions/transform/LayerNorm/batchnorm/add_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/cls/predictions/transform/LayerNorm/batchnorm/add_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/cls/predictions/transform/LayerNorm/batchnorm/add_grad/Reshape" + op: "Reshape" + input: "gradients/cls/predictions/transform/LayerNorm/batchnorm/add_grad/Sum" + input: "gradients/cls/predictions/transform/LayerNorm/batchnorm/add_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/cls/predictions/transform/LayerNorm/moments/variance_grad/Cast/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "@\001\000\000\200\000\000\000" + } + } + } +} +node { + name: "gradients/cls/predictions/transform/LayerNorm/moments/variance_grad/Cast_1/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "gradients/cls/predictions/transform/LayerNorm/moments/variance_grad/Size" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 2 + } + } + } +} +node { + name: "gradients/cls/predictions/transform/LayerNorm/moments/variance_grad/add" + op: "AddV2" + input: "gradients/cls/predictions/transform/LayerNorm/moments/variance_grad/Cast_1/x" + input: "gradients/cls/predictions/transform/LayerNorm/moments/variance_grad/Size" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/cls/predictions/transform/LayerNorm/moments/variance_grad/mod" + op: "FloorMod" + input: "gradients/cls/predictions/transform/LayerNorm/moments/variance_grad/add" + input: "gradients/cls/predictions/transform/LayerNorm/moments/variance_grad/Size" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/cls/predictions/transform/LayerNorm/moments/variance_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "gradients/cls/predictions/transform/LayerNorm/moments/variance_grad/range/start" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 0 + } + } + } +} +node { + name: "gradients/cls/predictions/transform/LayerNorm/moments/variance_grad/range/delta" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/cls/predictions/transform/LayerNorm/moments/variance_grad/range" + op: "Range" + input: "gradients/cls/predictions/transform/LayerNorm/moments/variance_grad/range/start" + input: "gradients/cls/predictions/transform/LayerNorm/moments/variance_grad/Size" + input: "gradients/cls/predictions/transform/LayerNorm/moments/variance_grad/range/delta" + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/cls/predictions/transform/LayerNorm/moments/variance_grad/Fill/value" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/cls/predictions/transform/LayerNorm/moments/variance_grad/Fill" + op: "Fill" + input: "gradients/cls/predictions/transform/LayerNorm/moments/variance_grad/Shape" + input: "gradients/cls/predictions/transform/LayerNorm/moments/variance_grad/Fill/value" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "gradients/cls/predictions/transform/LayerNorm/moments/variance_grad/DynamicStitch" + op: "DynamicStitch" + input: "gradients/cls/predictions/transform/LayerNorm/moments/variance_grad/range" + input: "gradients/cls/predictions/transform/LayerNorm/moments/variance_grad/mod" + input: "gradients/cls/predictions/transform/LayerNorm/moments/variance_grad/Cast/x" + input: "gradients/cls/predictions/transform/LayerNorm/moments/variance_grad/Fill" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/cls/predictions/transform/LayerNorm/moments/variance_grad/Maximum/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "@\001\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/cls/predictions/transform/LayerNorm/moments/variance_grad/Maximum/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/cls/predictions/transform/LayerNorm/moments/variance_grad/Maximum" + op: "Maximum" + input: "gradients/cls/predictions/transform/LayerNorm/moments/variance_grad/Maximum/x" + input: "gradients/cls/predictions/transform/LayerNorm/moments/variance_grad/Maximum/y" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/cls/predictions/transform/LayerNorm/moments/variance_grad/floordiv/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "@\001\000\000\200\000\000\000" + } + } + } +} +node { + name: "gradients/cls/predictions/transform/LayerNorm/moments/variance_grad/floordiv" + op: "FloorDiv" + input: "gradients/cls/predictions/transform/LayerNorm/moments/variance_grad/floordiv/x" + input: "gradients/cls/predictions/transform/LayerNorm/moments/variance_grad/Maximum" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/cls/predictions/transform/LayerNorm/moments/variance_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "@\001\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/cls/predictions/transform/LayerNorm/moments/variance_grad/Reshape" + op: "Reshape" + input: "gradients/cls/predictions/transform/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad" + input: "gradients/cls/predictions/transform/LayerNorm/moments/variance_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 320 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/cls/predictions/transform/LayerNorm/moments/variance_grad/Tile/multiples" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\001\000\000\000\200\000\000\000" + } + } + } +} +node { + name: "gradients/cls/predictions/transform/LayerNorm/moments/variance_grad/Tile" + op: "Tile" + input: "gradients/cls/predictions/transform/LayerNorm/moments/variance_grad/Reshape" + input: "gradients/cls/predictions/transform/LayerNorm/moments/variance_grad/Tile/multiples" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tmultiples" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 320 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/cls/predictions/transform/LayerNorm/moments/variance_grad/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 128.0 + } + } + } +} +node { + name: "gradients/cls/predictions/transform/LayerNorm/moments/variance_grad/truediv" + op: "RealDiv" + input: "gradients/cls/predictions/transform/LayerNorm/moments/variance_grad/Tile" + input: "gradients/cls/predictions/transform/LayerNorm/moments/variance_grad/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 320 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/cls/predictions/transform/LayerNorm/moments/SquaredDifference_grad/scalar" + op: "Const" + input: "^gradients/cls/predictions/transform/LayerNorm/moments/variance_grad/truediv" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 2.0 + } + } + } +} +node { + name: "gradients/cls/predictions/transform/LayerNorm/moments/SquaredDifference_grad/Mul" + op: "Mul" + input: "gradients/cls/predictions/transform/LayerNorm/moments/SquaredDifference_grad/scalar" + input: "gradients/cls/predictions/transform/LayerNorm/moments/variance_grad/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 320 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/cls/predictions/transform/LayerNorm/moments/SquaredDifference_grad/sub" + op: "Sub" + input: "cls/predictions/transform/dense/mul_3" + input: "cls/predictions/transform/LayerNorm/moments/StopGradient" + input: "^gradients/cls/predictions/transform/LayerNorm/moments/variance_grad/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 320 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/cls/predictions/transform/LayerNorm/moments/SquaredDifference_grad/mul_1" + op: "Mul" + input: "gradients/cls/predictions/transform/LayerNorm/moments/SquaredDifference_grad/Mul" + input: "gradients/cls/predictions/transform/LayerNorm/moments/SquaredDifference_grad/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 320 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/cls/predictions/transform/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs/s0" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "@\001\000\000\200\000\000\000" + } + } + } +} +node { + name: "gradients/cls/predictions/transform/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs/s1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "@\001\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/cls/predictions/transform/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/cls/predictions/transform/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs/s0" + input: "gradients/cls/predictions/transform/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs/s1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/cls/predictions/transform/LayerNorm/moments/SquaredDifference_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "gradients/cls/predictions/transform/LayerNorm/moments/SquaredDifference_grad/Sum" + op: "Sum" + input: "gradients/cls/predictions/transform/LayerNorm/moments/SquaredDifference_grad/mul_1" + input: "gradients/cls/predictions/transform/LayerNorm/moments/SquaredDifference_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 320 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/cls/predictions/transform/LayerNorm/moments/SquaredDifference_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "@\001\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/cls/predictions/transform/LayerNorm/moments/SquaredDifference_grad/Reshape" + op: "Reshape" + input: "gradients/cls/predictions/transform/LayerNorm/moments/SquaredDifference_grad/Sum" + input: "gradients/cls/predictions/transform/LayerNorm/moments/SquaredDifference_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 320 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/cls/predictions/transform/LayerNorm/moments/SquaredDifference_grad/Neg" + op: "Neg" + input: "gradients/cls/predictions/transform/LayerNorm/moments/SquaredDifference_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 320 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/cls/predictions/transform/LayerNorm/moments/mean_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "@\001\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/cls/predictions/transform/LayerNorm/moments/mean_grad/Reshape" + op: "Reshape" + input: "gradients/cls/predictions/transform/LayerNorm/batchnorm/mul_2_grad/Reshape" + input: "gradients/cls/predictions/transform/LayerNorm/moments/mean_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 320 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/cls/predictions/transform/LayerNorm/moments/mean_grad/Tile/multiples" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\001\000\000\000\200\000\000\000" + } + } + } +} +node { + name: "gradients/cls/predictions/transform/LayerNorm/moments/mean_grad/Tile" + op: "Tile" + input: "gradients/cls/predictions/transform/LayerNorm/moments/mean_grad/Reshape" + input: "gradients/cls/predictions/transform/LayerNorm/moments/mean_grad/Tile/multiples" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tmultiples" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 320 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/cls/predictions/transform/LayerNorm/moments/mean_grad/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 128.0 + } + } + } +} +node { + name: "gradients/cls/predictions/transform/LayerNorm/moments/mean_grad/truediv" + op: "RealDiv" + input: "gradients/cls/predictions/transform/LayerNorm/moments/mean_grad/Tile" + input: "gradients/cls/predictions/transform/LayerNorm/moments/mean_grad/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 320 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/AddN_1" + op: "AddN" + input: "gradients/cls/predictions/transform/LayerNorm/batchnorm/mul_1_grad/Mul" + input: "gradients/cls/predictions/transform/LayerNorm/moments/SquaredDifference_grad/mul_1" + input: "gradients/cls/predictions/transform/LayerNorm/moments/mean_grad/truediv" + attr { + key: "N" + value { + i: 3 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/cls/predictions/transform/LayerNorm/batchnorm/mul_1_grad/Mul" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 320 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/cls/predictions/transform/dense/mul_3_grad/Mul" + op: "Mul" + input: "gradients/AddN_1" + input: "cls/predictions/transform/dense/mul_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 320 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/cls/predictions/transform/dense/mul_3_grad/Mul_1" + op: "Mul" + input: "gradients/AddN_1" + input: "cls/predictions/transform/dense/BiasAdd" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 320 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/cls/predictions/transform/dense/mul_2_grad/BroadcastGradientArgs/s0" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/cls/predictions/transform/dense/mul_2_grad/BroadcastGradientArgs/s1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "@\001\000\000\200\000\000\000" + } + } + } +} +node { + name: "gradients/cls/predictions/transform/dense/mul_2_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/cls/predictions/transform/dense/mul_2_grad/BroadcastGradientArgs/s0" + input: "gradients/cls/predictions/transform/dense/mul_2_grad/BroadcastGradientArgs/s1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/cls/predictions/transform/dense/mul_2_grad/Mul" + op: "Mul" + input: "gradients/cls/predictions/transform/dense/mul_3_grad/Mul_1" + input: "cls/predictions/transform/dense/add_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 320 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/cls/predictions/transform/dense/mul_2_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/cls/predictions/transform/dense/mul_2_grad/Sum" + op: "Sum" + input: "gradients/cls/predictions/transform/dense/mul_2_grad/Mul" + input: "gradients/cls/predictions/transform/dense/mul_2_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/cls/predictions/transform/dense/mul_2_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/cls/predictions/transform/dense/mul_2_grad/Reshape" + op: "Reshape" + input: "gradients/cls/predictions/transform/dense/mul_2_grad/Sum" + input: "gradients/cls/predictions/transform/dense/mul_2_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/cls/predictions/transform/dense/mul_2_grad/Mul_1" + op: "Mul" + input: "cls/predictions/transform/dense/mul_2/x" + input: "gradients/cls/predictions/transform/dense/mul_3_grad/Mul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 320 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/cls/predictions/transform/dense/add_1_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/cls/predictions/transform/dense/add_1_grad/Sum" + op: "Sum" + input: "gradients/cls/predictions/transform/dense/mul_2_grad/Mul_1" + input: "gradients/cls/predictions/transform/dense/add_1_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/cls/predictions/transform/dense/add_1_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/cls/predictions/transform/dense/add_1_grad/Reshape" + op: "Reshape" + input: "gradients/cls/predictions/transform/dense/add_1_grad/Sum" + input: "gradients/cls/predictions/transform/dense/add_1_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/cls/predictions/transform/dense/Tanh_grad/TanhGrad" + op: "TanhGrad" + input: "cls/predictions/transform/dense/Tanh" + input: "gradients/cls/predictions/transform/dense/mul_2_grad/Mul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 320 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/cls/predictions/transform/dense/mul_1_grad/Mul" + op: "Mul" + input: "gradients/cls/predictions/transform/dense/Tanh_grad/TanhGrad" + input: "cls/predictions/transform/dense/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 320 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/cls/predictions/transform/dense/mul_1_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/cls/predictions/transform/dense/mul_1_grad/Sum" + op: "Sum" + input: "gradients/cls/predictions/transform/dense/mul_1_grad/Mul" + input: "gradients/cls/predictions/transform/dense/mul_1_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/cls/predictions/transform/dense/mul_1_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/cls/predictions/transform/dense/mul_1_grad/Reshape" + op: "Reshape" + input: "gradients/cls/predictions/transform/dense/mul_1_grad/Sum" + input: "gradients/cls/predictions/transform/dense/mul_1_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/cls/predictions/transform/dense/mul_1_grad/Mul_1" + op: "Mul" + input: "cls/predictions/transform/dense/mul_1/x" + input: "gradients/cls/predictions/transform/dense/Tanh_grad/TanhGrad" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 320 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/cls/predictions/transform/dense/mul_grad/Mul" + op: "Mul" + input: "gradients/cls/predictions/transform/dense/mul_1_grad/Mul_1" + input: "cls/predictions/transform/dense/Pow" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 320 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/cls/predictions/transform/dense/mul_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/cls/predictions/transform/dense/mul_grad/Sum" + op: "Sum" + input: "gradients/cls/predictions/transform/dense/mul_grad/Mul" + input: "gradients/cls/predictions/transform/dense/mul_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/cls/predictions/transform/dense/mul_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/cls/predictions/transform/dense/mul_grad/Reshape" + op: "Reshape" + input: "gradients/cls/predictions/transform/dense/mul_grad/Sum" + input: "gradients/cls/predictions/transform/dense/mul_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/cls/predictions/transform/dense/mul_grad/Mul_1" + op: "Mul" + input: "cls/predictions/transform/dense/mul/x" + input: "gradients/cls/predictions/transform/dense/mul_1_grad/Mul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 320 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/cls/predictions/transform/dense/Pow_grad/BroadcastGradientArgs/s0" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "@\001\000\000\200\000\000\000" + } + } + } +} +node { + name: "gradients/cls/predictions/transform/dense/Pow_grad/BroadcastGradientArgs/s1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/cls/predictions/transform/dense/Pow_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/cls/predictions/transform/dense/Pow_grad/BroadcastGradientArgs/s0" + input: "gradients/cls/predictions/transform/dense/Pow_grad/BroadcastGradientArgs/s1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/cls/predictions/transform/dense/Pow_grad/mul" + op: "Mul" + input: "gradients/cls/predictions/transform/dense/mul_grad/Mul_1" + input: "cls/predictions/transform/dense/Pow/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 320 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/cls/predictions/transform/dense/Pow_grad/sub/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "gradients/cls/predictions/transform/dense/Pow_grad/sub" + op: "Sub" + input: "cls/predictions/transform/dense/Pow/y" + input: "gradients/cls/predictions/transform/dense/Pow_grad/sub/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/cls/predictions/transform/dense/Pow_grad/Pow" + op: "Pow" + input: "cls/predictions/transform/dense/BiasAdd" + input: "gradients/cls/predictions/transform/dense/Pow_grad/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 320 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/cls/predictions/transform/dense/Pow_grad/mul_1" + op: "Mul" + input: "gradients/cls/predictions/transform/dense/Pow_grad/mul" + input: "gradients/cls/predictions/transform/dense/Pow_grad/Pow" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 320 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/cls/predictions/transform/dense/Pow_grad/Greater/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "gradients/cls/predictions/transform/dense/Pow_grad/Greater" + op: "Greater" + input: "cls/predictions/transform/dense/BiasAdd" + input: "gradients/cls/predictions/transform/dense/Pow_grad/Greater/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 320 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/cls/predictions/transform/dense/Pow_grad/ones_like/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "@\001\000\000\200\000\000\000" + } + } + } +} +node { + name: "gradients/cls/predictions/transform/dense/Pow_grad/ones_like/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "gradients/cls/predictions/transform/dense/Pow_grad/ones_like" + op: "Fill" + input: "gradients/cls/predictions/transform/dense/Pow_grad/ones_like/Shape" + input: "gradients/cls/predictions/transform/dense/Pow_grad/ones_like/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 320 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "gradients/cls/predictions/transform/dense/Pow_grad/Select" + op: "Select" + input: "gradients/cls/predictions/transform/dense/Pow_grad/Greater" + input: "cls/predictions/transform/dense/BiasAdd" + input: "gradients/cls/predictions/transform/dense/Pow_grad/ones_like" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 320 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/cls/predictions/transform/dense/Pow_grad/Log" + op: "Log" + input: "gradients/cls/predictions/transform/dense/Pow_grad/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 320 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/cls/predictions/transform/dense/Pow_grad/zeros_like/shape_as_tensor" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "@\001\000\000\200\000\000\000" + } + } + } +} +node { + name: "gradients/cls/predictions/transform/dense/Pow_grad/zeros_like/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "gradients/cls/predictions/transform/dense/Pow_grad/zeros_like" + op: "Fill" + input: "gradients/cls/predictions/transform/dense/Pow_grad/zeros_like/shape_as_tensor" + input: "gradients/cls/predictions/transform/dense/Pow_grad/zeros_like/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 320 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "gradients/cls/predictions/transform/dense/Pow_grad/Select_1" + op: "Select" + input: "gradients/cls/predictions/transform/dense/Pow_grad/Greater" + input: "gradients/cls/predictions/transform/dense/Pow_grad/Log" + input: "gradients/cls/predictions/transform/dense/Pow_grad/zeros_like" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 320 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/cls/predictions/transform/dense/Pow_grad/mul_2" + op: "Mul" + input: "gradients/cls/predictions/transform/dense/mul_grad/Mul_1" + input: "cls/predictions/transform/dense/Pow" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 320 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/cls/predictions/transform/dense/Pow_grad/mul_3" + op: "Mul" + input: "gradients/cls/predictions/transform/dense/Pow_grad/mul_2" + input: "gradients/cls/predictions/transform/dense/Pow_grad/Select_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 320 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/cls/predictions/transform/dense/Pow_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/cls/predictions/transform/dense/Pow_grad/Sum" + op: "Sum" + input: "gradients/cls/predictions/transform/dense/Pow_grad/mul_3" + input: "gradients/cls/predictions/transform/dense/Pow_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/cls/predictions/transform/dense/Pow_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/cls/predictions/transform/dense/Pow_grad/Reshape" + op: "Reshape" + input: "gradients/cls/predictions/transform/dense/Pow_grad/Sum" + input: "gradients/cls/predictions/transform/dense/Pow_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/AddN_2" + op: "AddN" + input: "gradients/cls/predictions/transform/dense/mul_3_grad/Mul" + input: "gradients/cls/predictions/transform/dense/mul_1_grad/Mul_1" + input: "gradients/cls/predictions/transform/dense/Pow_grad/mul_1" + attr { + key: "N" + value { + i: 3 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/cls/predictions/transform/dense/mul_3_grad/Mul" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 320 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/cls/predictions/transform/dense/BiasAdd_grad/BiasAddGrad" + op: "BiasAddGrad" + input: "gradients/AddN_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 128 + } + } + } + } + } + attr { + key: "data_format" + value { + s: "NHWC" + } + } +} +node { + name: "gradients/cls/predictions/transform/dense/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/AddN_2" + input: "cls/predictions/transform/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 320 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "gradients/cls/predictions/transform/dense/MatMul_grad/MatMul_1" + op: "MatMul" + input: "GatherV2" + input: "gradients/AddN_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/GatherV2_grad/Shape" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@Reshape_2" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT64 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT64 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\004\000\000\000\000\000\000\000\003\000\000\000\000\000\000" + } + } + } +} +node { + name: "gradients/GatherV2_grad/Cast" + op: "Cast" + input: "gradients/GatherV2_grad/Shape" + attr { + key: "DstT" + value { + type: DT_INT32 + } + } + attr { + key: "SrcT" + value { + type: DT_INT64 + } + } + attr { + key: "Truncate" + value { + b: false + } + } + attr { + key: "_class" + value { + list { + s: "loc:@Reshape_2" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/GatherV2_grad/Size" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 320 + } + } + } +} +node { + name: "gradients/GatherV2_grad/ExpandDims/dim" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 0 + } + } + } +} +node { + name: "gradients/GatherV2_grad/ExpandDims" + op: "ExpandDims" + input: "gradients/GatherV2_grad/Size" + input: "gradients/GatherV2_grad/ExpandDims/dim" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "Tdim" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/GatherV2_grad/strided_slice/stack" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "gradients/GatherV2_grad/strided_slice/stack_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 0 + } + } + } +} +node { + name: "gradients/GatherV2_grad/strided_slice/stack_2" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "gradients/GatherV2_grad/strided_slice" + op: "StridedSlice" + input: "gradients/GatherV2_grad/Cast" + input: "gradients/GatherV2_grad/strided_slice/stack" + input: "gradients/GatherV2_grad/strided_slice/stack_1" + input: "gradients/GatherV2_grad/strided_slice/stack_2" + attr { + key: "Index" + value { + type: DT_INT32 + } + } + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "begin_mask" + value { + i: 0 + } + } + attr { + key: "ellipsis_mask" + value { + i: 0 + } + } + attr { + key: "end_mask" + value { + i: 1 + } + } + attr { + key: "new_axis_mask" + value { + i: 0 + } + } + attr { + key: "shrink_axis_mask" + value { + i: 0 + } + } +} +node { + name: "gradients/GatherV2_grad/concat/axis" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 0 + } + } + } +} +node { + name: "gradients/GatherV2_grad/concat" + op: "ConcatV2" + input: "gradients/GatherV2_grad/ExpandDims" + input: "gradients/GatherV2_grad/strided_slice" + input: "gradients/GatherV2_grad/concat/axis" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/GatherV2_grad/Reshape" + op: "Reshape" + input: "gradients/cls/predictions/transform/dense/MatMul_grad/MatMul" + input: "gradients/GatherV2_grad/concat" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 320 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/GatherV2_grad/Reshape_1" + op: "Reshape" + input: "Reshape_1" + input: "gradients/GatherV2_grad/ExpandDims" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 320 + } + } + } + } + } +} +node { + name: "gradients/Reshape_2_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/Reshape_2_grad/Reshape/strided_slice/stack" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 0 + } + } + } +} +node { + name: "gradients/Reshape_2_grad/Reshape/strided_slice/stack_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "gradients/Reshape_2_grad/Reshape/strided_slice/stack_2" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "gradients/Reshape_2_grad/Reshape/strided_slice" + op: "StridedSlice" + input: "gradients/GatherV2_grad/Cast" + input: "gradients/Reshape_2_grad/Reshape/strided_slice/stack" + input: "gradients/Reshape_2_grad/Reshape/strided_slice/stack_1" + input: "gradients/Reshape_2_grad/Reshape/strided_slice/stack_2" + attr { + key: "Index" + value { + type: DT_INT32 + } + } + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "begin_mask" + value { + i: 0 + } + } + attr { + key: "ellipsis_mask" + value { + i: 0 + } + } + attr { + key: "end_mask" + value { + i: 0 + } + } + attr { + key: "new_axis_mask" + value { + i: 0 + } + } + attr { + key: "shrink_axis_mask" + value { + i: 1 + } + } +} +node { + name: "gradients/Reshape_2_grad/Reshape/tensor" + op: "UnsortedSegmentSum" + input: "gradients/GatherV2_grad/Reshape" + input: "gradients/GatherV2_grad/Reshape_1" + input: "gradients/Reshape_2_grad/Reshape/strided_slice" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tindices" + value { + type: DT_INT32 + } + } + attr { + key: "Tnumsegments" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/Reshape_2_grad/Reshape" + op: "Reshape" + input: "gradients/Reshape_2_grad/Reshape/tensor" + input: "gradients/Reshape_2_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/AddN_3" + op: "AddN" + input: "gradients/bert/pooler/strided_slice_grad/StridedSliceGrad" + input: "gradients/Reshape_2_grad/Reshape" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/pooler/strided_slice_grad/StridedSliceGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm_1/batchnorm/mul_1_grad/Mul" + op: "Mul" + input: "gradients/AddN_3" + input: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm_1/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm_1/batchnorm/mul_1_grad/Mul_1" + op: "Mul" + input: "gradients/AddN_3" + input: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/add_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm_1/batchnorm/sub_grad/BroadcastGradientArgs/s0" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 768 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm_1/batchnorm/sub_grad/BroadcastGradientArgs/s1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm_1/batchnorm/sub_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm_1/batchnorm/sub_grad/BroadcastGradientArgs/s0" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm_1/batchnorm/sub_grad/BroadcastGradientArgs/s1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm_1/batchnorm/sub_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm_1/batchnorm/sub_grad/Sum" + op: "Sum" + input: "gradients/AddN_3" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm_1/batchnorm/sub_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm_1/batchnorm/sub_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 768 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm_1/batchnorm/sub_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm_1/batchnorm/sub_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm_1/batchnorm/sub_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm_1/batchnorm/sub_grad/Neg" + op: "Neg" + input: "gradients/AddN_3" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm_1/batchnorm/mul_2_grad/BroadcastGradientArgs/s0" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm_1/batchnorm/mul_2_grad/BroadcastGradientArgs/s1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm_1/batchnorm/mul_2_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm_1/batchnorm/mul_2_grad/BroadcastGradientArgs/s0" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm_1/batchnorm/mul_2_grad/BroadcastGradientArgs/s1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm_1/batchnorm/mul_2_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm_1/batchnorm/sub_grad/Neg" + input: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm_1/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm_1/batchnorm/mul_2_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 2 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm_1/batchnorm/mul_2_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm_1/batchnorm/mul_2_grad/Mul" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm_1/batchnorm/mul_2_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm_1/batchnorm/mul_2_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm_1/batchnorm/mul_2_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm_1/batchnorm/mul_2_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm_1/batchnorm/mul_2_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm_1/batchnorm/mul_2_grad/Mul_1" + op: "Mul" + input: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm_1/moments/mean" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm_1/batchnorm/sub_grad/Neg" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/AddN_4" + op: "AddN" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm_1/batchnorm/mul_1_grad/Mul_1" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm_1/batchnorm/mul_2_grad/Mul_1" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm_1/batchnorm/mul_1_grad/Mul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm_1/batchnorm/mul_grad/BroadcastGradientArgs/s0" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm_1/batchnorm/mul_grad/BroadcastGradientArgs/s1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 768 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm_1/batchnorm/mul_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm_1/batchnorm/mul_grad/BroadcastGradientArgs/s0" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm_1/batchnorm/mul_grad/BroadcastGradientArgs/s1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Mul" + op: "Mul" + input: "gradients/AddN_4" + input: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm_1/gamma/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 2 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Mul" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Mul_1" + op: "Mul" + input: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm_1/batchnorm/Rsqrt" + input: "gradients/AddN_4" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Sum_1/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Mul_1" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Sum_1/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Reshape_1/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 768 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Sum_1" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Reshape_1/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm_1/batchnorm/Rsqrt_grad/RsqrtGrad" + op: "RsqrtGrad" + input: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm_1/batchnorm/Rsqrt" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm_1/batchnorm/add_grad/BroadcastGradientArgs/s0" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm_1/batchnorm/add_grad/BroadcastGradientArgs/s1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm_1/batchnorm/add_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm_1/batchnorm/add_grad/BroadcastGradientArgs/s0" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm_1/batchnorm/add_grad/BroadcastGradientArgs/s1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm_1/batchnorm/add_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\000\000\000\000\001\000\000\000\002\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm_1/batchnorm/add_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm_1/batchnorm/Rsqrt_grad/RsqrtGrad" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm_1/batchnorm/add_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm_1/batchnorm/add_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm_1/batchnorm/add_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm_1/batchnorm/add_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm_1/batchnorm/add_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm_1/moments/variance_grad/Cast/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm_1/moments/variance_grad/Cast_1/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 2 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm_1/moments/variance_grad/Size" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 3 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm_1/moments/variance_grad/add" + op: "AddV2" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm_1/moments/variance_grad/Cast_1/x" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm_1/moments/variance_grad/Size" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm_1/moments/variance_grad/mod" + op: "FloorMod" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm_1/moments/variance_grad/add" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm_1/moments/variance_grad/Size" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm_1/moments/variance_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm_1/moments/variance_grad/range/start" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 0 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm_1/moments/variance_grad/range/delta" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm_1/moments/variance_grad/range" + op: "Range" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm_1/moments/variance_grad/range/start" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm_1/moments/variance_grad/Size" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm_1/moments/variance_grad/range/delta" + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm_1/moments/variance_grad/Fill/value" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm_1/moments/variance_grad/Fill" + op: "Fill" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm_1/moments/variance_grad/Shape" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm_1/moments/variance_grad/Fill/value" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm_1/moments/variance_grad/DynamicStitch" + op: "DynamicStitch" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm_1/moments/variance_grad/range" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm_1/moments/variance_grad/mod" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm_1/moments/variance_grad/Cast/x" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm_1/moments/variance_grad/Fill" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm_1/moments/variance_grad/Maximum/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm_1/moments/variance_grad/Maximum/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm_1/moments/variance_grad/Maximum" + op: "Maximum" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm_1/moments/variance_grad/Maximum/x" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm_1/moments/variance_grad/Maximum/y" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm_1/moments/variance_grad/floordiv/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm_1/moments/variance_grad/floordiv" + op: "FloorDiv" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm_1/moments/variance_grad/floordiv/x" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm_1/moments/variance_grad/Maximum" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm_1/moments/variance_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm_1/moments/variance_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm_1/batchnorm/Rsqrt_grad/RsqrtGrad" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm_1/moments/variance_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm_1/moments/variance_grad/Tile/multiples" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\001\000\000\000\001\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm_1/moments/variance_grad/Tile" + op: "Tile" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm_1/moments/variance_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm_1/moments/variance_grad/Tile/multiples" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tmultiples" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm_1/moments/variance_grad/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 768.0 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm_1/moments/variance_grad/truediv" + op: "RealDiv" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm_1/moments/variance_grad/Tile" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm_1/moments/variance_grad/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm_1/moments/SquaredDifference_grad/scalar" + op: "Const" + input: "^gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm_1/moments/variance_grad/truediv" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 2.0 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm_1/moments/SquaredDifference_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm_1/moments/SquaredDifference_grad/scalar" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm_1/moments/variance_grad/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm_1/moments/SquaredDifference_grad/sub" + op: "Sub" + input: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/add_1" + input: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm_1/moments/StopGradient" + input: "^gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm_1/moments/variance_grad/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm_1/moments/SquaredDifference_grad/mul_1" + op: "Mul" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm_1/moments/SquaredDifference_grad/Mul" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm_1/moments/SquaredDifference_grad/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm_1/moments/SquaredDifference_grad/BroadcastGradientArgs/s0" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm_1/moments/SquaredDifference_grad/BroadcastGradientArgs/s1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm_1/moments/SquaredDifference_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm_1/moments/SquaredDifference_grad/BroadcastGradientArgs/s0" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm_1/moments/SquaredDifference_grad/BroadcastGradientArgs/s1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm_1/moments/SquaredDifference_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 2 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm_1/moments/SquaredDifference_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm_1/moments/SquaredDifference_grad/mul_1" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm_1/moments/SquaredDifference_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm_1/moments/SquaredDifference_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm_1/moments/SquaredDifference_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm_1/moments/SquaredDifference_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm_1/moments/SquaredDifference_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm_1/moments/SquaredDifference_grad/Neg" + op: "Neg" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm_1/moments/SquaredDifference_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm_1/moments/mean_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm_1/moments/mean_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm_1/batchnorm/mul_2_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm_1/moments/mean_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm_1/moments/mean_grad/Tile/multiples" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\001\000\000\000\001\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm_1/moments/mean_grad/Tile" + op: "Tile" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm_1/moments/mean_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm_1/moments/mean_grad/Tile/multiples" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tmultiples" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm_1/moments/mean_grad/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 768.0 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm_1/moments/mean_grad/truediv" + op: "RealDiv" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm_1/moments/mean_grad/Tile" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm_1/moments/mean_grad/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/AddN_5" + op: "AddN" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm_1/batchnorm/mul_1_grad/Mul" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm_1/moments/SquaredDifference_grad/mul_1" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm_1/moments/mean_grad/truediv" + attr { + key: "N" + value { + i: 3 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm_1/batchnorm/mul_1_grad/Mul" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/output/dense/add_grad/BroadcastGradientArgs/s0" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/output/dense/add_grad/BroadcastGradientArgs/s1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 768 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/output/dense/add_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/output/dense/add_grad/BroadcastGradientArgs/s0" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/output/dense/add_grad/BroadcastGradientArgs/s1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/output/dense/add_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/output/dense/add_grad/Sum" + op: "Sum" + input: "gradients/AddN_5" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/output/dense/add_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/output/dense/add_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 768 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/output/dense/add_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/output/dense/add_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/output/dense/add_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape_1_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\004\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape_1_grad/Reshape" + op: "Reshape" + input: "gradients/AddN_5" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape_1_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/output/dense/einsum/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape_1_grad/Reshape" + input: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/output/dense/einsum/MatMul_grad/MatMul_1" + op: "MatMul" + input: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape_1_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\000\014\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/output/dense/einsum/MatMul_grad/MatMul" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/mul_3_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape_grad/Reshape" + input: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/mul_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/mul_3_grad/Mul_1" + op: "Mul" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape_grad/Reshape" + input: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/dense/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/mul_2_grad/BroadcastGradientArgs/s0" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/mul_2_grad/BroadcastGradientArgs/s1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\000\014\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/mul_2_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/mul_2_grad/BroadcastGradientArgs/s0" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/mul_2_grad/BroadcastGradientArgs/s1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/mul_2_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/mul_3_grad/Mul_1" + input: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/add_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/mul_2_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\000\000\000\000\001\000\000\000\002\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/mul_2_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/mul_2_grad/Mul" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/mul_2_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/mul_2_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/mul_2_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/mul_2_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/mul_2_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/mul_2_grad/Mul_1" + op: "Mul" + input: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/mul_2/x" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/mul_3_grad/Mul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/add_1_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\000\000\000\000\001\000\000\000\002\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/add_1_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/mul_2_grad/Mul_1" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/add_1_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/add_1_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/add_1_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/add_1_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/add_1_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/Tanh_grad/TanhGrad" + op: "TanhGrad" + input: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/Tanh" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/mul_2_grad/Mul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/mul_1_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/Tanh_grad/TanhGrad" + input: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/mul_1_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\000\000\000\000\001\000\000\000\002\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/mul_1_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/mul_1_grad/Mul" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/mul_1_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/mul_1_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/mul_1_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/mul_1_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/mul_1_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/mul_1_grad/Mul_1" + op: "Mul" + input: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/mul_1/x" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/Tanh_grad/TanhGrad" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/mul_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/mul_1_grad/Mul_1" + input: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/Pow" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/mul_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\000\000\000\000\001\000\000\000\002\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/mul_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/mul_grad/Mul" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/mul_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/mul_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/mul_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/mul_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/mul_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/mul_grad/Mul_1" + op: "Mul" + input: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/mul/x" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/mul_1_grad/Mul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/Pow_grad/BroadcastGradientArgs/s0" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\000\014\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/Pow_grad/BroadcastGradientArgs/s1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/Pow_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/Pow_grad/BroadcastGradientArgs/s0" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/Pow_grad/BroadcastGradientArgs/s1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/Pow_grad/mul" + op: "Mul" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/mul_grad/Mul_1" + input: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/Pow/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/Pow_grad/sub/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/Pow_grad/sub" + op: "Sub" + input: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/Pow/y" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/Pow_grad/sub/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/Pow_grad/Pow" + op: "Pow" + input: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/dense/add" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/Pow_grad/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/Pow_grad/mul_1" + op: "Mul" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/Pow_grad/mul" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/Pow_grad/Pow" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/Pow_grad/Greater/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/Pow_grad/Greater" + op: "Greater" + input: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/dense/add" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/Pow_grad/Greater/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/Pow_grad/ones_like/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\000\014\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/Pow_grad/ones_like/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/Pow_grad/ones_like" + op: "Fill" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/Pow_grad/ones_like/Shape" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/Pow_grad/ones_like/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/Pow_grad/Select" + op: "Select" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/Pow_grad/Greater" + input: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/dense/add" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/Pow_grad/ones_like" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/Pow_grad/Log" + op: "Log" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/Pow_grad/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/Pow_grad/zeros_like/shape_as_tensor" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\000\014\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/Pow_grad/zeros_like/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/Pow_grad/zeros_like" + op: "Fill" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/Pow_grad/zeros_like/shape_as_tensor" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/Pow_grad/zeros_like/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/Pow_grad/Select_1" + op: "Select" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/Pow_grad/Greater" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/Pow_grad/Log" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/Pow_grad/zeros_like" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/Pow_grad/mul_2" + op: "Mul" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/mul_grad/Mul_1" + input: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/Pow" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/Pow_grad/mul_3" + op: "Mul" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/Pow_grad/mul_2" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/Pow_grad/Select_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/Pow_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\000\000\000\000\001\000\000\000\002\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/Pow_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/Pow_grad/mul_3" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/Pow_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/Pow_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/Pow_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/Pow_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/Pow_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/AddN_6" + op: "AddN" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/mul_3_grad/Mul" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/mul_1_grad/Mul_1" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/Pow_grad/mul_1" + attr { + key: "N" + value { + i: 3 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/mul_3_grad/Mul" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/dense/add_grad/BroadcastGradientArgs/s0" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\000\014\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/dense/add_grad/BroadcastGradientArgs/s1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 3072 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/dense/add_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/dense/add_grad/BroadcastGradientArgs/s0" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/dense/add_grad/BroadcastGradientArgs/s1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/dense/add_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/dense/add_grad/Sum" + op: "Sum" + input: "gradients/AddN_6" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/dense/add_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/dense/add_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 3072 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/dense/add_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/dense/add_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/dense/add_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/dense/einsum/Reshape_1_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\004\000\000\000\014\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/dense/einsum/Reshape_1_grad/Reshape" + op: "Reshape" + input: "gradients/AddN_6" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/dense/einsum/Reshape_1_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/dense/einsum/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/dense/einsum/Reshape_1_grad/Reshape" + input: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/dense/einsum/MatMul_grad/MatMul_1" + op: "MatMul" + input: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/dense/einsum/Reshape" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/dense/einsum/Reshape_1_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/dense/einsum/Reshape_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/dense/einsum/Reshape_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/dense/einsum/MatMul_grad/MatMul" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/dense/einsum/Reshape_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/AddN_7" + op: "AddN" + input: "gradients/AddN_5" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/dense/einsum/Reshape_grad/Reshape" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm_1/batchnorm/mul_1_grad/Mul" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm/batchnorm/mul_1_grad/Mul" + op: "Mul" + input: "gradients/AddN_7" + input: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm/batchnorm/mul_1_grad/Mul_1" + op: "Mul" + input: "gradients/AddN_7" + input: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm/batchnorm/sub_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm/batchnorm/sub_grad/Sum" + op: "Sum" + input: "gradients/AddN_7" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm/batchnorm/sub_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm/batchnorm/sub_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 768 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm/batchnorm/sub_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm/batchnorm/sub_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm/batchnorm/sub_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm/batchnorm/sub_grad/Neg" + op: "Neg" + input: "gradients/AddN_7" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm/batchnorm/mul_2_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm/batchnorm/sub_grad/Neg" + input: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm/batchnorm/mul_2_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 2 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm/batchnorm/mul_2_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm/batchnorm/mul_2_grad/Mul" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm/batchnorm/mul_2_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm/batchnorm/mul_2_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm/batchnorm/mul_2_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm/batchnorm/mul_2_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm/batchnorm/mul_2_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm/batchnorm/mul_2_grad/Mul_1" + op: "Mul" + input: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm/moments/mean" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm/batchnorm/sub_grad/Neg" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/AddN_8" + op: "AddN" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm/batchnorm/mul_1_grad/Mul_1" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm/batchnorm/mul_2_grad/Mul_1" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm/batchnorm/mul_1_grad/Mul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm/batchnorm/mul_grad/Mul" + op: "Mul" + input: "gradients/AddN_8" + input: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm/gamma/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm/batchnorm/mul_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 2 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm/batchnorm/mul_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm/batchnorm/mul_grad/Mul" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm/batchnorm/mul_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm/batchnorm/mul_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm/batchnorm/mul_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm/batchnorm/mul_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm/batchnorm/mul_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm/batchnorm/mul_grad/Mul_1" + op: "Mul" + input: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm/batchnorm/Rsqrt" + input: "gradients/AddN_8" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm/batchnorm/mul_grad/Sum_1/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm/batchnorm/mul_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm/batchnorm/mul_grad/Mul_1" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm/batchnorm/mul_grad/Sum_1/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm/batchnorm/mul_grad/Reshape_1/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 768 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm/batchnorm/mul_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm/batchnorm/mul_grad/Sum_1" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm/batchnorm/mul_grad/Reshape_1/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad" + op: "RsqrtGrad" + input: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm/batchnorm/Rsqrt" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm/batchnorm/mul_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm/batchnorm/add_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\000\000\000\000\001\000\000\000\002\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm/batchnorm/add_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm/batchnorm/add_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm/batchnorm/add_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm/batchnorm/add_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm/batchnorm/add_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm/batchnorm/add_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm/moments/variance_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm/moments/variance_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm/moments/variance_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm/moments/variance_grad/Tile/multiples" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\001\000\000\000\001\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm/moments/variance_grad/Tile" + op: "Tile" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm/moments/variance_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm/moments/variance_grad/Tile/multiples" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tmultiples" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm/moments/variance_grad/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 768.0 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm/moments/variance_grad/truediv" + op: "RealDiv" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm/moments/variance_grad/Tile" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm/moments/variance_grad/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm/moments/SquaredDifference_grad/scalar" + op: "Const" + input: "^gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm/moments/variance_grad/truediv" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 2.0 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm/moments/SquaredDifference_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm/moments/SquaredDifference_grad/scalar" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm/moments/variance_grad/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm/moments/SquaredDifference_grad/sub" + op: "Sub" + input: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/add" + input: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm/moments/StopGradient" + input: "^gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm/moments/variance_grad/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm/moments/SquaredDifference_grad/mul_1" + op: "Mul" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm/moments/SquaredDifference_grad/Mul" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm/moments/SquaredDifference_grad/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm/moments/SquaredDifference_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 2 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm/moments/SquaredDifference_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm/moments/SquaredDifference_grad/mul_1" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm/moments/SquaredDifference_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm/moments/SquaredDifference_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm/moments/SquaredDifference_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm/moments/SquaredDifference_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm/moments/SquaredDifference_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm/moments/SquaredDifference_grad/Neg" + op: "Neg" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm/moments/SquaredDifference_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm/moments/mean_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm/moments/mean_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm/batchnorm/mul_2_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm/moments/mean_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm/moments/mean_grad/Tile/multiples" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\001\000\000\000\001\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm/moments/mean_grad/Tile" + op: "Tile" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm/moments/mean_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm/moments/mean_grad/Tile/multiples" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tmultiples" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm/moments/mean_grad/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 768.0 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm/moments/mean_grad/truediv" + op: "RealDiv" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm/moments/mean_grad/Tile" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm/moments/mean_grad/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/AddN_9" + op: "AddN" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm/batchnorm/mul_1_grad/Mul" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm/moments/SquaredDifference_grad/mul_1" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm/moments/mean_grad/truediv" + attr { + key: "N" + value { + i: 3 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm/batchnorm/mul_1_grad/Mul" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/output/dense/add_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/output/dense/add_grad/Sum" + op: "Sum" + input: "gradients/AddN_9" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/output/dense/add_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/output/dense/add_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 768 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/output/dense/add_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/output/dense/add_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/output/dense/add_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/output/dense/einsum/Reshape_2_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\004\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/output/dense/einsum/Reshape_2_grad/Reshape" + op: "Reshape" + input: "gradients/AddN_9" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/output/dense/einsum/Reshape_2_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/output/dense/einsum/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/output/dense/einsum/Reshape_2_grad/Reshape" + input: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/output/dense/einsum/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/output/dense/einsum/MatMul_grad/MatMul_1" + op: "MatMul" + input: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/output/dense/einsum/Reshape" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/output/dense/einsum/Reshape_2_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/output/dense/einsum/Reshape_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\020\000\000\000@\000\000\000@\000\000\000\014\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/output/dense/einsum/Reshape_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/output/dense/einsum/MatMul_grad/MatMul" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/output/dense/einsum/Reshape_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 64 + } + dim { + size: 12 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/output/dense/einsum/Reshape_1_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "@\000\000\000\014\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/output/dense/einsum/Reshape_1_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/output/dense/einsum/MatMul_grad/MatMul_1" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/output/dense/einsum/Reshape_1_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 64 + } + dim { + size: 12 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/output/dense/einsum/transpose_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/output/dense/einsum/transpose/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/output/dense/einsum/transpose_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/output/dense/einsum/Reshape_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/output/dense/einsum/transpose_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/output/dense/einsum/transpose_1_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/output/dense/einsum/transpose_1/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/output/dense/einsum/transpose_1_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/output/dense/einsum/Reshape_1_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/output/dense/einsum/transpose_1_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/transpose_3_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/transpose_3/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/transpose_3_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/output/dense/einsum/transpose_grad/transpose" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/transpose_3_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/output/dense/Reshape_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/output/dense/Reshape_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/output/dense/einsum/transpose_1_grad/transpose" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/output/dense/Reshape_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/MatMul_2_grad/MatMul" + op: "BatchMatMulV2" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/transpose_3_grad/transpose" + input: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/transpose_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: false + } + } + attr { + key: "adj_y" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/MatMul_2_grad/MatMul_1" + op: "BatchMatMulV2" + input: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/attention_probs" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/transpose_3_grad/transpose" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: true + } + } + attr { + key: "adj_y" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/attention_probs_grad/mul" + op: "Mul" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/MatMul_2_grad/MatMul" + input: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/attention_probs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/attention_probs_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: -1 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/attention_probs_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/attention_probs_grad/mul" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/attention_probs_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/attention_probs_grad/sub" + op: "Sub" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/MatMul_2_grad/MatMul" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/attention_probs_grad/Sum" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/attention_probs_grad/mul_1" + op: "Mul" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/attention_probs_grad/sub" + input: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/attention_probs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/transpose_2_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/transpose_2/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/transpose_2_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/MatMul_2_grad/MatMul_1" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/transpose_2_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/add_grad/BroadcastGradientArgs/s0" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\020\000\000\000\014\000\000\000@\000\000\000@\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/add_grad/BroadcastGradientArgs/s1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\020\000\000\000\001\000\000\000@\000\000\000@\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/add_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/add_grad/BroadcastGradientArgs/s0" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/add_grad/BroadcastGradientArgs/s1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/add_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/add_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/attention_probs_grad/mul_1" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/add_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/add_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\020\000\000\000\001\000\000\000@\000\000\000@\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/add_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/add_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/add_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 1 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/value/add_grad/BroadcastGradientArgs/s0" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\020\000\000\000@\000\000\000\014\000\000\000@\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/value/add_grad/BroadcastGradientArgs/s1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\014\000\000\000@\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/value/add_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/value/add_grad/BroadcastGradientArgs/s0" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/value/add_grad/BroadcastGradientArgs/s1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/value/add_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/value/add_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/transpose_2_grad/transpose" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/value/add_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/value/add_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\014\000\000\000@\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/value/add_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/value/add_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/value/add_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/Mul_grad/BroadcastGradientArgs/s0" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\020\000\000\000\014\000\000\000@\000\000\000@\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/Mul_grad/BroadcastGradientArgs/s1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/Mul_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/Mul_grad/BroadcastGradientArgs/s0" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/Mul_grad/BroadcastGradientArgs/s1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/Mul_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/attention_probs_grad/mul_1" + input: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/Mul/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/Mul_grad/Mul_1" + op: "Mul" + input: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/MatMul" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/attention_probs_grad/mul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/Mul_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\000\000\000\000\001\000\000\000\002\000\000\000\003\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/Mul_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/Mul_grad/Mul_1" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/Mul_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/Mul_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/Mul_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/Mul_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/Mul_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/value/einsum/transpose_1_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/value/einsum/transpose_1/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/value/einsum/transpose_1_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/transpose_2_grad/transpose" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/value/einsum/transpose_1_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 64 + } + dim { + size: 12 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/value/Reshape_1_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 768 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/value/Reshape_1_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/value/add_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/value/Reshape_1_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/MatMul_grad/MatMul" + op: "BatchMatMulV2" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/Mul_grad/Mul" + input: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/transpose_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: false + } + } + attr { + key: "adj_y" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/MatMul_grad/MatMul_1" + op: "BatchMatMulV2" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/Mul_grad/Mul" + input: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/transpose" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: true + } + } + attr { + key: "adj_y" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/value/einsum/Reshape_2_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\004\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/value/einsum/Reshape_2_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/value/einsum/transpose_1_grad/transpose" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/value/einsum/Reshape_2_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/transpose_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/transpose/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/transpose_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/MatMul_grad/MatMul" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/transpose_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/transpose_1_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/transpose_1/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/transpose_1_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/MatMul_grad/MatMul_1" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/transpose_1_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/value/einsum/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/value/einsum/Reshape_2_grad/Reshape" + input: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/value/einsum/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/value/einsum/MatMul_grad/MatMul_1" + op: "MatMul" + input: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/value/einsum/Reshape" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/value/einsum/Reshape_2_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/query/add_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/query/add_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/transpose_grad/transpose" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/query/add_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/query/add_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\014\000\000\000@\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/query/add_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/query/add_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/query/add_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/key/add_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/key/add_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/transpose_1_grad/transpose" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/key/add_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/key/add_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\014\000\000\000@\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/key/add_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/key/add_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/key/add_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/value/einsum/Reshape_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/value/einsum/Reshape_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/value/einsum/MatMul_grad/MatMul" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/value/einsum/Reshape_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/value/einsum/Reshape_1_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\000\003\000\000@\000\000\000\014\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/value/einsum/Reshape_1_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/value/einsum/MatMul_grad/MatMul_1" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/value/einsum/Reshape_1_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 64 + } + dim { + size: 12 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/query/einsum/transpose_1_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/query/einsum/transpose_1/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/query/einsum/transpose_1_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/transpose_grad/transpose" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/query/einsum/transpose_1_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 64 + } + dim { + size: 12 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/query/Reshape_1_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 768 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/query/Reshape_1_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/query/add_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/query/Reshape_1_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/key/einsum/transpose_1_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/key/einsum/transpose_1/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/key/einsum/transpose_1_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/transpose_1_grad/transpose" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/key/einsum/transpose_1_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 64 + } + dim { + size: 12 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/key/Reshape_1_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 768 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/key/Reshape_1_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/key/add_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/key/Reshape_1_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/value/einsum/transpose_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/value/einsum/transpose/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/value/einsum/transpose_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/value/einsum/Reshape_1_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/value/einsum/transpose_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/query/einsum/Reshape_2_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\004\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/query/einsum/Reshape_2_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/query/einsum/transpose_1_grad/transpose" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/query/einsum/Reshape_2_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/key/einsum/Reshape_2_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\004\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/key/einsum/Reshape_2_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/key/einsum/transpose_1_grad/transpose" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/key/einsum/Reshape_2_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/value/Reshape_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/value/Reshape_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/value/einsum/transpose_grad/transpose" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/value/Reshape_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/query/einsum/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/query/einsum/Reshape_2_grad/Reshape" + input: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/query/einsum/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/query/einsum/MatMul_grad/MatMul_1" + op: "MatMul" + input: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/query/einsum/Reshape" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/query/einsum/Reshape_2_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/key/einsum/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/key/einsum/Reshape_2_grad/Reshape" + input: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/key/einsum/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/key/einsum/MatMul_grad/MatMul_1" + op: "MatMul" + input: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/key/einsum/Reshape" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/key/einsum/Reshape_2_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/query/einsum/Reshape_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/query/einsum/Reshape_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/query/einsum/MatMul_grad/MatMul" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/query/einsum/Reshape_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/query/einsum/Reshape_1_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\000\003\000\000@\000\000\000\014\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/query/einsum/Reshape_1_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/query/einsum/MatMul_grad/MatMul_1" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/query/einsum/Reshape_1_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 64 + } + dim { + size: 12 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/key/einsum/Reshape_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/key/einsum/Reshape_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/key/einsum/MatMul_grad/MatMul" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/key/einsum/Reshape_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/key/einsum/Reshape_1_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\000\003\000\000@\000\000\000\014\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/key/einsum/Reshape_1_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/key/einsum/MatMul_grad/MatMul_1" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/key/einsum/Reshape_1_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 64 + } + dim { + size: 12 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/query/einsum/transpose_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/query/einsum/transpose/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/query/einsum/transpose_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/query/einsum/Reshape_1_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/query/einsum/transpose_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/AddN_10" + op: "AddN" + input: "gradients/AddN_9" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/value/einsum/Reshape_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/query/einsum/Reshape_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/key/einsum/Reshape_grad/Reshape" + attr { + key: "N" + value { + i: 4 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm/batchnorm/mul_1_grad/Mul" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/key/einsum/transpose_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/key/einsum/transpose/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/key/einsum/transpose_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/key/einsum/Reshape_1_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/key/einsum/transpose_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/query/Reshape_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/query/Reshape_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/query/einsum/transpose_grad/transpose" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/query/Reshape_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm_1/batchnorm/mul_1_grad/Mul" + op: "Mul" + input: "gradients/AddN_10" + input: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm_1/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm_1/batchnorm/mul_1_grad/Mul_1" + op: "Mul" + input: "gradients/AddN_10" + input: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/add_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm_1/batchnorm/sub_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm_1/batchnorm/sub_grad/Sum" + op: "Sum" + input: "gradients/AddN_10" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm_1/batchnorm/sub_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm_1/batchnorm/sub_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 768 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm_1/batchnorm/sub_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm_1/batchnorm/sub_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm_1/batchnorm/sub_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm_1/batchnorm/sub_grad/Neg" + op: "Neg" + input: "gradients/AddN_10" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/key/Reshape_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/key/Reshape_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/key/einsum/transpose_grad/transpose" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/key/Reshape_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm_1/batchnorm/mul_2_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm_1/batchnorm/sub_grad/Neg" + input: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm_1/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm_1/batchnorm/mul_2_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 2 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm_1/batchnorm/mul_2_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm_1/batchnorm/mul_2_grad/Mul" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm_1/batchnorm/mul_2_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm_1/batchnorm/mul_2_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm_1/batchnorm/mul_2_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm_1/batchnorm/mul_2_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm_1/batchnorm/mul_2_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm_1/batchnorm/mul_2_grad/Mul_1" + op: "Mul" + input: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm_1/moments/mean" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm_1/batchnorm/sub_grad/Neg" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/AddN_11" + op: "AddN" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm_1/batchnorm/mul_1_grad/Mul_1" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm_1/batchnorm/mul_2_grad/Mul_1" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm_1/batchnorm/mul_1_grad/Mul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Mul" + op: "Mul" + input: "gradients/AddN_11" + input: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm_1/gamma/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 2 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Mul" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Mul_1" + op: "Mul" + input: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm_1/batchnorm/Rsqrt" + input: "gradients/AddN_11" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Sum_1/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Mul_1" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Sum_1/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Reshape_1/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 768 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Sum_1" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Reshape_1/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm_1/batchnorm/Rsqrt_grad/RsqrtGrad" + op: "RsqrtGrad" + input: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm_1/batchnorm/Rsqrt" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm_1/batchnorm/add_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\000\000\000\000\001\000\000\000\002\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm_1/batchnorm/add_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm_1/batchnorm/Rsqrt_grad/RsqrtGrad" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm_1/batchnorm/add_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm_1/batchnorm/add_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm_1/batchnorm/add_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm_1/batchnorm/add_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm_1/batchnorm/add_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm_1/moments/variance_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm_1/moments/variance_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm_1/batchnorm/Rsqrt_grad/RsqrtGrad" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm_1/moments/variance_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm_1/moments/variance_grad/Tile/multiples" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\001\000\000\000\001\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm_1/moments/variance_grad/Tile" + op: "Tile" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm_1/moments/variance_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm_1/moments/variance_grad/Tile/multiples" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tmultiples" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm_1/moments/variance_grad/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 768.0 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm_1/moments/variance_grad/truediv" + op: "RealDiv" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm_1/moments/variance_grad/Tile" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm_1/moments/variance_grad/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm_1/moments/SquaredDifference_grad/scalar" + op: "Const" + input: "^gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm_1/moments/variance_grad/truediv" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 2.0 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm_1/moments/SquaredDifference_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm_1/moments/SquaredDifference_grad/scalar" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm_1/moments/variance_grad/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm_1/moments/SquaredDifference_grad/sub" + op: "Sub" + input: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/add_1" + input: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm_1/moments/StopGradient" + input: "^gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm_1/moments/variance_grad/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm_1/moments/SquaredDifference_grad/mul_1" + op: "Mul" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm_1/moments/SquaredDifference_grad/Mul" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm_1/moments/SquaredDifference_grad/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm_1/moments/SquaredDifference_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 2 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm_1/moments/SquaredDifference_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm_1/moments/SquaredDifference_grad/mul_1" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm_1/moments/SquaredDifference_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm_1/moments/SquaredDifference_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm_1/moments/SquaredDifference_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm_1/moments/SquaredDifference_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm_1/moments/SquaredDifference_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm_1/moments/SquaredDifference_grad/Neg" + op: "Neg" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm_1/moments/SquaredDifference_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm_1/moments/mean_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm_1/moments/mean_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm_1/batchnorm/mul_2_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm_1/moments/mean_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm_1/moments/mean_grad/Tile/multiples" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\001\000\000\000\001\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm_1/moments/mean_grad/Tile" + op: "Tile" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm_1/moments/mean_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm_1/moments/mean_grad/Tile/multiples" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tmultiples" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm_1/moments/mean_grad/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 768.0 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm_1/moments/mean_grad/truediv" + op: "RealDiv" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm_1/moments/mean_grad/Tile" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm_1/moments/mean_grad/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/AddN_12" + op: "AddN" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm_1/batchnorm/mul_1_grad/Mul" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm_1/moments/SquaredDifference_grad/mul_1" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm_1/moments/mean_grad/truediv" + attr { + key: "N" + value { + i: 3 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm_1/batchnorm/mul_1_grad/Mul" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/ffn_1/intermediate/output/dense/add_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/ffn_1/intermediate/output/dense/add_grad/Sum" + op: "Sum" + input: "gradients/AddN_12" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/ffn_1/intermediate/output/dense/add_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/ffn_1/intermediate/output/dense/add_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 768 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/ffn_1/intermediate/output/dense/add_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/ffn_1/intermediate/output/dense/add_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/ffn_1/intermediate/output/dense/add_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape_1_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\004\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape_1_grad/Reshape" + op: "Reshape" + input: "gradients/AddN_12" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape_1_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/ffn_1/intermediate/output/dense/einsum/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape_1_grad/Reshape" + input: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/ffn_1/intermediate/output/dense/einsum/MatMul_grad/MatMul_1" + op: "MatMul" + input: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape_1_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\000\014\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/ffn_1/intermediate/output/dense/einsum/MatMul_grad/MatMul" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/ffn_1/intermediate/mul_3_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape_grad/Reshape" + input: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/ffn_1/intermediate/mul_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/ffn_1/intermediate/mul_3_grad/Mul_1" + op: "Mul" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape_grad/Reshape" + input: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/ffn_1/intermediate/dense/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/ffn_1/intermediate/mul_2_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/ffn_1/intermediate/mul_3_grad/Mul_1" + input: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/ffn_1/intermediate/add_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/ffn_1/intermediate/mul_2_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\000\000\000\000\001\000\000\000\002\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/ffn_1/intermediate/mul_2_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/ffn_1/intermediate/mul_2_grad/Mul" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/ffn_1/intermediate/mul_2_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/ffn_1/intermediate/mul_2_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/ffn_1/intermediate/mul_2_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/ffn_1/intermediate/mul_2_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/ffn_1/intermediate/mul_2_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/ffn_1/intermediate/mul_2_grad/Mul_1" + op: "Mul" + input: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/ffn_1/intermediate/mul_2/x" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/ffn_1/intermediate/mul_3_grad/Mul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/ffn_1/intermediate/add_1_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\000\000\000\000\001\000\000\000\002\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/ffn_1/intermediate/add_1_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/ffn_1/intermediate/mul_2_grad/Mul_1" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/ffn_1/intermediate/add_1_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/ffn_1/intermediate/add_1_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/ffn_1/intermediate/add_1_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/ffn_1/intermediate/add_1_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/ffn_1/intermediate/add_1_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/ffn_1/intermediate/Tanh_grad/TanhGrad" + op: "TanhGrad" + input: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/ffn_1/intermediate/Tanh" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/ffn_1/intermediate/mul_2_grad/Mul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/ffn_1/intermediate/mul_1_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/ffn_1/intermediate/Tanh_grad/TanhGrad" + input: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/ffn_1/intermediate/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/ffn_1/intermediate/mul_1_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\000\000\000\000\001\000\000\000\002\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/ffn_1/intermediate/mul_1_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/ffn_1/intermediate/mul_1_grad/Mul" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/ffn_1/intermediate/mul_1_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/ffn_1/intermediate/mul_1_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/ffn_1/intermediate/mul_1_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/ffn_1/intermediate/mul_1_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/ffn_1/intermediate/mul_1_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/ffn_1/intermediate/mul_1_grad/Mul_1" + op: "Mul" + input: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/ffn_1/intermediate/mul_1/x" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/ffn_1/intermediate/Tanh_grad/TanhGrad" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/ffn_1/intermediate/mul_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/ffn_1/intermediate/mul_1_grad/Mul_1" + input: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/ffn_1/intermediate/Pow" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/ffn_1/intermediate/mul_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\000\000\000\000\001\000\000\000\002\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/ffn_1/intermediate/mul_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/ffn_1/intermediate/mul_grad/Mul" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/ffn_1/intermediate/mul_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/ffn_1/intermediate/mul_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/ffn_1/intermediate/mul_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/ffn_1/intermediate/mul_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/ffn_1/intermediate/mul_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/ffn_1/intermediate/mul_grad/Mul_1" + op: "Mul" + input: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/ffn_1/intermediate/mul/x" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/ffn_1/intermediate/mul_1_grad/Mul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/ffn_1/intermediate/Pow_grad/mul" + op: "Mul" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/ffn_1/intermediate/mul_grad/Mul_1" + input: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/ffn_1/intermediate/Pow/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/ffn_1/intermediate/Pow_grad/sub/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/ffn_1/intermediate/Pow_grad/sub" + op: "Sub" + input: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/ffn_1/intermediate/Pow/y" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/ffn_1/intermediate/Pow_grad/sub/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/ffn_1/intermediate/Pow_grad/Pow" + op: "Pow" + input: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/ffn_1/intermediate/dense/add" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/ffn_1/intermediate/Pow_grad/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/ffn_1/intermediate/Pow_grad/mul_1" + op: "Mul" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/ffn_1/intermediate/Pow_grad/mul" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/ffn_1/intermediate/Pow_grad/Pow" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/ffn_1/intermediate/Pow_grad/Greater/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/ffn_1/intermediate/Pow_grad/Greater" + op: "Greater" + input: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/ffn_1/intermediate/dense/add" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/ffn_1/intermediate/Pow_grad/Greater/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/ffn_1/intermediate/Pow_grad/ones_like/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\000\014\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/ffn_1/intermediate/Pow_grad/ones_like/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/ffn_1/intermediate/Pow_grad/ones_like" + op: "Fill" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/ffn_1/intermediate/Pow_grad/ones_like/Shape" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/ffn_1/intermediate/Pow_grad/ones_like/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/ffn_1/intermediate/Pow_grad/Select" + op: "Select" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/ffn_1/intermediate/Pow_grad/Greater" + input: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/ffn_1/intermediate/dense/add" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/ffn_1/intermediate/Pow_grad/ones_like" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/ffn_1/intermediate/Pow_grad/Log" + op: "Log" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/ffn_1/intermediate/Pow_grad/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/ffn_1/intermediate/Pow_grad/zeros_like/shape_as_tensor" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\000\014\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/ffn_1/intermediate/Pow_grad/zeros_like/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/ffn_1/intermediate/Pow_grad/zeros_like" + op: "Fill" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/ffn_1/intermediate/Pow_grad/zeros_like/shape_as_tensor" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/ffn_1/intermediate/Pow_grad/zeros_like/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/ffn_1/intermediate/Pow_grad/Select_1" + op: "Select" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/ffn_1/intermediate/Pow_grad/Greater" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/ffn_1/intermediate/Pow_grad/Log" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/ffn_1/intermediate/Pow_grad/zeros_like" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/ffn_1/intermediate/Pow_grad/mul_2" + op: "Mul" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/ffn_1/intermediate/mul_grad/Mul_1" + input: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/ffn_1/intermediate/Pow" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/ffn_1/intermediate/Pow_grad/mul_3" + op: "Mul" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/ffn_1/intermediate/Pow_grad/mul_2" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/ffn_1/intermediate/Pow_grad/Select_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/ffn_1/intermediate/Pow_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\000\000\000\000\001\000\000\000\002\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/ffn_1/intermediate/Pow_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/ffn_1/intermediate/Pow_grad/mul_3" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/ffn_1/intermediate/Pow_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/ffn_1/intermediate/Pow_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/ffn_1/intermediate/Pow_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/ffn_1/intermediate/Pow_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/ffn_1/intermediate/Pow_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/AddN_13" + op: "AddN" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/ffn_1/intermediate/mul_3_grad/Mul" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/ffn_1/intermediate/mul_1_grad/Mul_1" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/ffn_1/intermediate/Pow_grad/mul_1" + attr { + key: "N" + value { + i: 3 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/ffn_1/intermediate/mul_3_grad/Mul" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/ffn_1/intermediate/dense/add_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/ffn_1/intermediate/dense/add_grad/Sum" + op: "Sum" + input: "gradients/AddN_13" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/ffn_1/intermediate/dense/add_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/ffn_1/intermediate/dense/add_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 3072 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/ffn_1/intermediate/dense/add_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/ffn_1/intermediate/dense/add_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/ffn_1/intermediate/dense/add_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/ffn_1/intermediate/dense/einsum/Reshape_1_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\004\000\000\000\014\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/ffn_1/intermediate/dense/einsum/Reshape_1_grad/Reshape" + op: "Reshape" + input: "gradients/AddN_13" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/ffn_1/intermediate/dense/einsum/Reshape_1_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/ffn_1/intermediate/dense/einsum/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/ffn_1/intermediate/dense/einsum/Reshape_1_grad/Reshape" + input: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/ffn_1/intermediate/dense/einsum/MatMul_grad/MatMul_1" + op: "MatMul" + input: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/ffn_1/intermediate/dense/einsum/Reshape" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/ffn_1/intermediate/dense/einsum/Reshape_1_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/ffn_1/intermediate/dense/einsum/Reshape_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/ffn_1/intermediate/dense/einsum/Reshape_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/ffn_1/intermediate/dense/einsum/MatMul_grad/MatMul" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/ffn_1/intermediate/dense/einsum/Reshape_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/AddN_14" + op: "AddN" + input: "gradients/AddN_12" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/ffn_1/intermediate/dense/einsum/Reshape_grad/Reshape" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm_1/batchnorm/mul_1_grad/Mul" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm/batchnorm/mul_1_grad/Mul" + op: "Mul" + input: "gradients/AddN_14" + input: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm/batchnorm/mul_1_grad/Mul_1" + op: "Mul" + input: "gradients/AddN_14" + input: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm/batchnorm/sub_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm/batchnorm/sub_grad/Sum" + op: "Sum" + input: "gradients/AddN_14" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm/batchnorm/sub_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm/batchnorm/sub_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 768 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm/batchnorm/sub_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm/batchnorm/sub_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm/batchnorm/sub_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm/batchnorm/sub_grad/Neg" + op: "Neg" + input: "gradients/AddN_14" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm/batchnorm/mul_2_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm/batchnorm/sub_grad/Neg" + input: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm/batchnorm/mul_2_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 2 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm/batchnorm/mul_2_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm/batchnorm/mul_2_grad/Mul" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm/batchnorm/mul_2_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm/batchnorm/mul_2_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm/batchnorm/mul_2_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm/batchnorm/mul_2_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm/batchnorm/mul_2_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm/batchnorm/mul_2_grad/Mul_1" + op: "Mul" + input: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm/moments/mean" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm/batchnorm/sub_grad/Neg" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/AddN_15" + op: "AddN" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm/batchnorm/mul_1_grad/Mul_1" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm/batchnorm/mul_2_grad/Mul_1" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm/batchnorm/mul_1_grad/Mul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm/batchnorm/mul_grad/Mul" + op: "Mul" + input: "gradients/AddN_15" + input: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm/gamma/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm/batchnorm/mul_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 2 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm/batchnorm/mul_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm/batchnorm/mul_grad/Mul" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm/batchnorm/mul_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm/batchnorm/mul_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm/batchnorm/mul_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm/batchnorm/mul_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm/batchnorm/mul_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm/batchnorm/mul_grad/Mul_1" + op: "Mul" + input: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm/batchnorm/Rsqrt" + input: "gradients/AddN_15" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm/batchnorm/mul_grad/Sum_1/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm/batchnorm/mul_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm/batchnorm/mul_grad/Mul_1" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm/batchnorm/mul_grad/Sum_1/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm/batchnorm/mul_grad/Reshape_1/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 768 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm/batchnorm/mul_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm/batchnorm/mul_grad/Sum_1" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm/batchnorm/mul_grad/Reshape_1/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad" + op: "RsqrtGrad" + input: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm/batchnorm/Rsqrt" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm/batchnorm/mul_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm/batchnorm/add_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\000\000\000\000\001\000\000\000\002\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm/batchnorm/add_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm/batchnorm/add_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm/batchnorm/add_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm/batchnorm/add_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm/batchnorm/add_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm/batchnorm/add_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm/moments/variance_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm/moments/variance_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm/moments/variance_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm/moments/variance_grad/Tile/multiples" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\001\000\000\000\001\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm/moments/variance_grad/Tile" + op: "Tile" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm/moments/variance_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm/moments/variance_grad/Tile/multiples" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tmultiples" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm/moments/variance_grad/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 768.0 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm/moments/variance_grad/truediv" + op: "RealDiv" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm/moments/variance_grad/Tile" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm/moments/variance_grad/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm/moments/SquaredDifference_grad/scalar" + op: "Const" + input: "^gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm/moments/variance_grad/truediv" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 2.0 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm/moments/SquaredDifference_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm/moments/SquaredDifference_grad/scalar" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm/moments/variance_grad/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm/moments/SquaredDifference_grad/sub" + op: "Sub" + input: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/add" + input: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm/moments/StopGradient" + input: "^gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm/moments/variance_grad/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm/moments/SquaredDifference_grad/mul_1" + op: "Mul" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm/moments/SquaredDifference_grad/Mul" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm/moments/SquaredDifference_grad/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm/moments/SquaredDifference_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 2 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm/moments/SquaredDifference_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm/moments/SquaredDifference_grad/mul_1" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm/moments/SquaredDifference_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm/moments/SquaredDifference_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm/moments/SquaredDifference_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm/moments/SquaredDifference_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm/moments/SquaredDifference_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm/moments/SquaredDifference_grad/Neg" + op: "Neg" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm/moments/SquaredDifference_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm/moments/mean_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm/moments/mean_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm/batchnorm/mul_2_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm/moments/mean_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm/moments/mean_grad/Tile/multiples" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\001\000\000\000\001\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm/moments/mean_grad/Tile" + op: "Tile" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm/moments/mean_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm/moments/mean_grad/Tile/multiples" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tmultiples" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm/moments/mean_grad/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 768.0 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm/moments/mean_grad/truediv" + op: "RealDiv" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm/moments/mean_grad/Tile" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm/moments/mean_grad/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/AddN_16" + op: "AddN" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm/batchnorm/mul_1_grad/Mul" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm/moments/SquaredDifference_grad/mul_1" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm/moments/mean_grad/truediv" + attr { + key: "N" + value { + i: 3 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm/batchnorm/mul_1_grad/Mul" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/output/dense/add_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/output/dense/add_grad/Sum" + op: "Sum" + input: "gradients/AddN_16" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/output/dense/add_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/output/dense/add_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 768 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/output/dense/add_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/output/dense/add_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/output/dense/add_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/output/dense/einsum/Reshape_2_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\004\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/output/dense/einsum/Reshape_2_grad/Reshape" + op: "Reshape" + input: "gradients/AddN_16" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/output/dense/einsum/Reshape_2_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/output/dense/einsum/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/output/dense/einsum/Reshape_2_grad/Reshape" + input: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/output/dense/einsum/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/output/dense/einsum/MatMul_grad/MatMul_1" + op: "MatMul" + input: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/output/dense/einsum/Reshape" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/output/dense/einsum/Reshape_2_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/output/dense/einsum/Reshape_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\020\000\000\000@\000\000\000@\000\000\000\014\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/output/dense/einsum/Reshape_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/output/dense/einsum/MatMul_grad/MatMul" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/output/dense/einsum/Reshape_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 64 + } + dim { + size: 12 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/output/dense/einsum/Reshape_1_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "@\000\000\000\014\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/output/dense/einsum/Reshape_1_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/output/dense/einsum/MatMul_grad/MatMul_1" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/output/dense/einsum/Reshape_1_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 64 + } + dim { + size: 12 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/output/dense/einsum/transpose_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/output/dense/einsum/transpose/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/output/dense/einsum/transpose_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/output/dense/einsum/Reshape_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/output/dense/einsum/transpose_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/output/dense/einsum/transpose_1_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/output/dense/einsum/transpose_1/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/output/dense/einsum/transpose_1_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/output/dense/einsum/Reshape_1_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/output/dense/einsum/transpose_1_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/transpose_3_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/transpose_3/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/transpose_3_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/output/dense/einsum/transpose_grad/transpose" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/transpose_3_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/output/dense/Reshape_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/output/dense/Reshape_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/output/dense/einsum/transpose_1_grad/transpose" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/output/dense/Reshape_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/MatMul_2_grad/MatMul" + op: "BatchMatMulV2" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/transpose_3_grad/transpose" + input: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/transpose_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: false + } + } + attr { + key: "adj_y" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/MatMul_2_grad/MatMul_1" + op: "BatchMatMulV2" + input: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/attention_probs" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/transpose_3_grad/transpose" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: true + } + } + attr { + key: "adj_y" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/attention_probs_grad/mul" + op: "Mul" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/MatMul_2_grad/MatMul" + input: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/attention_probs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/attention_probs_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: -1 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/attention_probs_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/attention_probs_grad/mul" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/attention_probs_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/attention_probs_grad/sub" + op: "Sub" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/MatMul_2_grad/MatMul" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/attention_probs_grad/Sum" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/attention_probs_grad/mul_1" + op: "Mul" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/attention_probs_grad/sub" + input: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/attention_probs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/transpose_2_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/transpose_2/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/transpose_2_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/MatMul_2_grad/MatMul_1" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/transpose_2_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/add_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/add_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/attention_probs_grad/mul_1" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/add_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/add_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\020\000\000\000\001\000\000\000@\000\000\000@\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/add_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/add_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/add_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 1 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/value/add_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/value/add_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/transpose_2_grad/transpose" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/value/add_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/value/add_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\014\000\000\000@\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/value/add_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/value/add_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/value/add_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/Mul_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/attention_probs_grad/mul_1" + input: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/Mul/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/Mul_grad/Mul_1" + op: "Mul" + input: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/MatMul" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/attention_probs_grad/mul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/Mul_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\000\000\000\000\001\000\000\000\002\000\000\000\003\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/Mul_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/Mul_grad/Mul_1" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/Mul_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/Mul_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/Mul_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/Mul_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/Mul_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/value/einsum/transpose_1_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/value/einsum/transpose_1/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/value/einsum/transpose_1_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/transpose_2_grad/transpose" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/value/einsum/transpose_1_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 64 + } + dim { + size: 12 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/value/Reshape_1_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 768 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/value/Reshape_1_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/value/add_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/value/Reshape_1_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/MatMul_grad/MatMul" + op: "BatchMatMulV2" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/Mul_grad/Mul" + input: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/transpose_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: false + } + } + attr { + key: "adj_y" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/MatMul_grad/MatMul_1" + op: "BatchMatMulV2" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/Mul_grad/Mul" + input: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/transpose" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: true + } + } + attr { + key: "adj_y" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/value/einsum/Reshape_2_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\004\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/value/einsum/Reshape_2_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/value/einsum/transpose_1_grad/transpose" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/value/einsum/Reshape_2_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/transpose_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/transpose/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/transpose_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/MatMul_grad/MatMul" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/transpose_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/transpose_1_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/transpose_1/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/transpose_1_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/MatMul_grad/MatMul_1" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/transpose_1_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/value/einsum/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/value/einsum/Reshape_2_grad/Reshape" + input: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/value/einsum/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/value/einsum/MatMul_grad/MatMul_1" + op: "MatMul" + input: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/value/einsum/Reshape" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/value/einsum/Reshape_2_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/query/add_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/query/add_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/transpose_grad/transpose" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/query/add_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/query/add_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\014\000\000\000@\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/query/add_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/query/add_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/query/add_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/key/add_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/key/add_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/transpose_1_grad/transpose" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/key/add_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/key/add_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\014\000\000\000@\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/key/add_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/key/add_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/key/add_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/value/einsum/Reshape_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/value/einsum/Reshape_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/value/einsum/MatMul_grad/MatMul" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/value/einsum/Reshape_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/value/einsum/Reshape_1_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\000\003\000\000@\000\000\000\014\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/value/einsum/Reshape_1_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/value/einsum/MatMul_grad/MatMul_1" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/value/einsum/Reshape_1_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 64 + } + dim { + size: 12 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/query/einsum/transpose_1_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/query/einsum/transpose_1/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/query/einsum/transpose_1_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/transpose_grad/transpose" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/query/einsum/transpose_1_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 64 + } + dim { + size: 12 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/query/Reshape_1_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 768 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/query/Reshape_1_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/query/add_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/query/Reshape_1_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/key/einsum/transpose_1_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/key/einsum/transpose_1/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/key/einsum/transpose_1_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/transpose_1_grad/transpose" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/key/einsum/transpose_1_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 64 + } + dim { + size: 12 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/key/Reshape_1_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 768 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/key/Reshape_1_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/key/add_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/key/Reshape_1_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/value/einsum/transpose_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/value/einsum/transpose/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/value/einsum/transpose_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/value/einsum/Reshape_1_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/value/einsum/transpose_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/query/einsum/Reshape_2_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\004\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/query/einsum/Reshape_2_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/query/einsum/transpose_1_grad/transpose" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/query/einsum/Reshape_2_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/key/einsum/Reshape_2_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\004\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/key/einsum/Reshape_2_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/key/einsum/transpose_1_grad/transpose" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/key/einsum/Reshape_2_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/value/Reshape_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/value/Reshape_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/value/einsum/transpose_grad/transpose" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/value/Reshape_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/query/einsum/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/query/einsum/Reshape_2_grad/Reshape" + input: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/query/einsum/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/query/einsum/MatMul_grad/MatMul_1" + op: "MatMul" + input: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/query/einsum/Reshape" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/query/einsum/Reshape_2_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/key/einsum/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/key/einsum/Reshape_2_grad/Reshape" + input: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/key/einsum/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/key/einsum/MatMul_grad/MatMul_1" + op: "MatMul" + input: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/key/einsum/Reshape" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/key/einsum/Reshape_2_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/query/einsum/Reshape_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/query/einsum/Reshape_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/query/einsum/MatMul_grad/MatMul" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/query/einsum/Reshape_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/query/einsum/Reshape_1_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\000\003\000\000@\000\000\000\014\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/query/einsum/Reshape_1_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/query/einsum/MatMul_grad/MatMul_1" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/query/einsum/Reshape_1_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 64 + } + dim { + size: 12 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/key/einsum/Reshape_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/key/einsum/Reshape_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/key/einsum/MatMul_grad/MatMul" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/key/einsum/Reshape_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/key/einsum/Reshape_1_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\000\003\000\000@\000\000\000\014\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/key/einsum/Reshape_1_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/key/einsum/MatMul_grad/MatMul_1" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/key/einsum/Reshape_1_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 64 + } + dim { + size: 12 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/query/einsum/transpose_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/query/einsum/transpose/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/query/einsum/transpose_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/query/einsum/Reshape_1_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/query/einsum/transpose_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/AddN_17" + op: "AddN" + input: "gradients/AddN_16" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/value/einsum/Reshape_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/query/einsum/Reshape_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/key/einsum/Reshape_grad/Reshape" + attr { + key: "N" + value { + i: 4 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm/batchnorm/mul_1_grad/Mul" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/key/einsum/transpose_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/key/einsum/transpose/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/key/einsum/transpose_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/key/einsum/Reshape_1_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/key/einsum/transpose_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/query/Reshape_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/query/Reshape_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/query/einsum/transpose_grad/transpose" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/query/Reshape_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm_1/batchnorm/mul_1_grad/Mul" + op: "Mul" + input: "gradients/AddN_17" + input: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm_1/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm_1/batchnorm/mul_1_grad/Mul_1" + op: "Mul" + input: "gradients/AddN_17" + input: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/add_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm_1/batchnorm/sub_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm_1/batchnorm/sub_grad/Sum" + op: "Sum" + input: "gradients/AddN_17" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm_1/batchnorm/sub_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm_1/batchnorm/sub_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 768 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm_1/batchnorm/sub_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm_1/batchnorm/sub_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm_1/batchnorm/sub_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm_1/batchnorm/sub_grad/Neg" + op: "Neg" + input: "gradients/AddN_17" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/key/Reshape_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/key/Reshape_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/key/einsum/transpose_grad/transpose" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/key/Reshape_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm_1/batchnorm/mul_2_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm_1/batchnorm/sub_grad/Neg" + input: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm_1/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm_1/batchnorm/mul_2_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 2 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm_1/batchnorm/mul_2_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm_1/batchnorm/mul_2_grad/Mul" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm_1/batchnorm/mul_2_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm_1/batchnorm/mul_2_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm_1/batchnorm/mul_2_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm_1/batchnorm/mul_2_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm_1/batchnorm/mul_2_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm_1/batchnorm/mul_2_grad/Mul_1" + op: "Mul" + input: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm_1/moments/mean" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm_1/batchnorm/sub_grad/Neg" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/AddN_18" + op: "AddN" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm_1/batchnorm/mul_1_grad/Mul_1" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm_1/batchnorm/mul_2_grad/Mul_1" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm_1/batchnorm/mul_1_grad/Mul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Mul" + op: "Mul" + input: "gradients/AddN_18" + input: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm_1/gamma/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 2 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Mul" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Mul_1" + op: "Mul" + input: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm_1/batchnorm/Rsqrt" + input: "gradients/AddN_18" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Sum_1/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Mul_1" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Sum_1/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Reshape_1/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 768 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Sum_1" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Reshape_1/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm_1/batchnorm/Rsqrt_grad/RsqrtGrad" + op: "RsqrtGrad" + input: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm_1/batchnorm/Rsqrt" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm_1/batchnorm/add_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\000\000\000\000\001\000\000\000\002\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm_1/batchnorm/add_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm_1/batchnorm/Rsqrt_grad/RsqrtGrad" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm_1/batchnorm/add_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm_1/batchnorm/add_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm_1/batchnorm/add_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm_1/batchnorm/add_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm_1/batchnorm/add_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm_1/moments/variance_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm_1/moments/variance_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm_1/batchnorm/Rsqrt_grad/RsqrtGrad" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm_1/moments/variance_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm_1/moments/variance_grad/Tile/multiples" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\001\000\000\000\001\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm_1/moments/variance_grad/Tile" + op: "Tile" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm_1/moments/variance_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm_1/moments/variance_grad/Tile/multiples" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tmultiples" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm_1/moments/variance_grad/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 768.0 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm_1/moments/variance_grad/truediv" + op: "RealDiv" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm_1/moments/variance_grad/Tile" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm_1/moments/variance_grad/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm_1/moments/SquaredDifference_grad/scalar" + op: "Const" + input: "^gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm_1/moments/variance_grad/truediv" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 2.0 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm_1/moments/SquaredDifference_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm_1/moments/SquaredDifference_grad/scalar" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm_1/moments/variance_grad/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm_1/moments/SquaredDifference_grad/sub" + op: "Sub" + input: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/add_1" + input: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm_1/moments/StopGradient" + input: "^gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm_1/moments/variance_grad/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm_1/moments/SquaredDifference_grad/mul_1" + op: "Mul" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm_1/moments/SquaredDifference_grad/Mul" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm_1/moments/SquaredDifference_grad/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm_1/moments/SquaredDifference_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 2 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm_1/moments/SquaredDifference_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm_1/moments/SquaredDifference_grad/mul_1" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm_1/moments/SquaredDifference_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm_1/moments/SquaredDifference_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm_1/moments/SquaredDifference_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm_1/moments/SquaredDifference_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm_1/moments/SquaredDifference_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm_1/moments/SquaredDifference_grad/Neg" + op: "Neg" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm_1/moments/SquaredDifference_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm_1/moments/mean_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm_1/moments/mean_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm_1/batchnorm/mul_2_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm_1/moments/mean_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm_1/moments/mean_grad/Tile/multiples" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\001\000\000\000\001\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm_1/moments/mean_grad/Tile" + op: "Tile" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm_1/moments/mean_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm_1/moments/mean_grad/Tile/multiples" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tmultiples" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm_1/moments/mean_grad/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 768.0 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm_1/moments/mean_grad/truediv" + op: "RealDiv" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm_1/moments/mean_grad/Tile" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm_1/moments/mean_grad/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/AddN_19" + op: "AddN" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm_1/batchnorm/mul_1_grad/Mul" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm_1/moments/SquaredDifference_grad/mul_1" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm_1/moments/mean_grad/truediv" + attr { + key: "N" + value { + i: 3 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm_1/batchnorm/mul_1_grad/Mul" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/ffn_1/intermediate/output/dense/add_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/ffn_1/intermediate/output/dense/add_grad/Sum" + op: "Sum" + input: "gradients/AddN_19" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/ffn_1/intermediate/output/dense/add_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/ffn_1/intermediate/output/dense/add_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 768 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/ffn_1/intermediate/output/dense/add_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/ffn_1/intermediate/output/dense/add_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/ffn_1/intermediate/output/dense/add_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape_1_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\004\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape_1_grad/Reshape" + op: "Reshape" + input: "gradients/AddN_19" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape_1_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/ffn_1/intermediate/output/dense/einsum/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape_1_grad/Reshape" + input: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/ffn_1/intermediate/output/dense/einsum/MatMul_grad/MatMul_1" + op: "MatMul" + input: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape_1_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\000\014\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/ffn_1/intermediate/output/dense/einsum/MatMul_grad/MatMul" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/ffn_1/intermediate/mul_3_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape_grad/Reshape" + input: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/ffn_1/intermediate/mul_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/ffn_1/intermediate/mul_3_grad/Mul_1" + op: "Mul" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape_grad/Reshape" + input: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/ffn_1/intermediate/dense/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/ffn_1/intermediate/mul_2_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/ffn_1/intermediate/mul_3_grad/Mul_1" + input: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/ffn_1/intermediate/add_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/ffn_1/intermediate/mul_2_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\000\000\000\000\001\000\000\000\002\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/ffn_1/intermediate/mul_2_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/ffn_1/intermediate/mul_2_grad/Mul" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/ffn_1/intermediate/mul_2_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/ffn_1/intermediate/mul_2_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/ffn_1/intermediate/mul_2_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/ffn_1/intermediate/mul_2_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/ffn_1/intermediate/mul_2_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/ffn_1/intermediate/mul_2_grad/Mul_1" + op: "Mul" + input: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/ffn_1/intermediate/mul_2/x" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/ffn_1/intermediate/mul_3_grad/Mul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/ffn_1/intermediate/add_1_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\000\000\000\000\001\000\000\000\002\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/ffn_1/intermediate/add_1_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/ffn_1/intermediate/mul_2_grad/Mul_1" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/ffn_1/intermediate/add_1_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/ffn_1/intermediate/add_1_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/ffn_1/intermediate/add_1_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/ffn_1/intermediate/add_1_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/ffn_1/intermediate/add_1_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/ffn_1/intermediate/Tanh_grad/TanhGrad" + op: "TanhGrad" + input: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/ffn_1/intermediate/Tanh" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/ffn_1/intermediate/mul_2_grad/Mul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/ffn_1/intermediate/mul_1_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/ffn_1/intermediate/Tanh_grad/TanhGrad" + input: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/ffn_1/intermediate/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/ffn_1/intermediate/mul_1_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\000\000\000\000\001\000\000\000\002\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/ffn_1/intermediate/mul_1_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/ffn_1/intermediate/mul_1_grad/Mul" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/ffn_1/intermediate/mul_1_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/ffn_1/intermediate/mul_1_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/ffn_1/intermediate/mul_1_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/ffn_1/intermediate/mul_1_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/ffn_1/intermediate/mul_1_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/ffn_1/intermediate/mul_1_grad/Mul_1" + op: "Mul" + input: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/ffn_1/intermediate/mul_1/x" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/ffn_1/intermediate/Tanh_grad/TanhGrad" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/ffn_1/intermediate/mul_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/ffn_1/intermediate/mul_1_grad/Mul_1" + input: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/ffn_1/intermediate/Pow" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/ffn_1/intermediate/mul_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\000\000\000\000\001\000\000\000\002\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/ffn_1/intermediate/mul_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/ffn_1/intermediate/mul_grad/Mul" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/ffn_1/intermediate/mul_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/ffn_1/intermediate/mul_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/ffn_1/intermediate/mul_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/ffn_1/intermediate/mul_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/ffn_1/intermediate/mul_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/ffn_1/intermediate/mul_grad/Mul_1" + op: "Mul" + input: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/ffn_1/intermediate/mul/x" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/ffn_1/intermediate/mul_1_grad/Mul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/ffn_1/intermediate/Pow_grad/mul" + op: "Mul" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/ffn_1/intermediate/mul_grad/Mul_1" + input: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/ffn_1/intermediate/Pow/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/ffn_1/intermediate/Pow_grad/sub/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/ffn_1/intermediate/Pow_grad/sub" + op: "Sub" + input: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/ffn_1/intermediate/Pow/y" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/ffn_1/intermediate/Pow_grad/sub/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/ffn_1/intermediate/Pow_grad/Pow" + op: "Pow" + input: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/ffn_1/intermediate/dense/add" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/ffn_1/intermediate/Pow_grad/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/ffn_1/intermediate/Pow_grad/mul_1" + op: "Mul" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/ffn_1/intermediate/Pow_grad/mul" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/ffn_1/intermediate/Pow_grad/Pow" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/ffn_1/intermediate/Pow_grad/Greater/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/ffn_1/intermediate/Pow_grad/Greater" + op: "Greater" + input: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/ffn_1/intermediate/dense/add" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/ffn_1/intermediate/Pow_grad/Greater/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/ffn_1/intermediate/Pow_grad/ones_like/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\000\014\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/ffn_1/intermediate/Pow_grad/ones_like/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/ffn_1/intermediate/Pow_grad/ones_like" + op: "Fill" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/ffn_1/intermediate/Pow_grad/ones_like/Shape" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/ffn_1/intermediate/Pow_grad/ones_like/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/ffn_1/intermediate/Pow_grad/Select" + op: "Select" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/ffn_1/intermediate/Pow_grad/Greater" + input: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/ffn_1/intermediate/dense/add" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/ffn_1/intermediate/Pow_grad/ones_like" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/ffn_1/intermediate/Pow_grad/Log" + op: "Log" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/ffn_1/intermediate/Pow_grad/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/ffn_1/intermediate/Pow_grad/zeros_like/shape_as_tensor" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\000\014\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/ffn_1/intermediate/Pow_grad/zeros_like/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/ffn_1/intermediate/Pow_grad/zeros_like" + op: "Fill" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/ffn_1/intermediate/Pow_grad/zeros_like/shape_as_tensor" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/ffn_1/intermediate/Pow_grad/zeros_like/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/ffn_1/intermediate/Pow_grad/Select_1" + op: "Select" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/ffn_1/intermediate/Pow_grad/Greater" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/ffn_1/intermediate/Pow_grad/Log" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/ffn_1/intermediate/Pow_grad/zeros_like" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/ffn_1/intermediate/Pow_grad/mul_2" + op: "Mul" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/ffn_1/intermediate/mul_grad/Mul_1" + input: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/ffn_1/intermediate/Pow" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/ffn_1/intermediate/Pow_grad/mul_3" + op: "Mul" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/ffn_1/intermediate/Pow_grad/mul_2" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/ffn_1/intermediate/Pow_grad/Select_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/ffn_1/intermediate/Pow_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\000\000\000\000\001\000\000\000\002\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/ffn_1/intermediate/Pow_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/ffn_1/intermediate/Pow_grad/mul_3" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/ffn_1/intermediate/Pow_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/ffn_1/intermediate/Pow_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/ffn_1/intermediate/Pow_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/ffn_1/intermediate/Pow_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/ffn_1/intermediate/Pow_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/AddN_20" + op: "AddN" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/ffn_1/intermediate/mul_3_grad/Mul" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/ffn_1/intermediate/mul_1_grad/Mul_1" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/ffn_1/intermediate/Pow_grad/mul_1" + attr { + key: "N" + value { + i: 3 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/ffn_1/intermediate/mul_3_grad/Mul" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/ffn_1/intermediate/dense/add_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/ffn_1/intermediate/dense/add_grad/Sum" + op: "Sum" + input: "gradients/AddN_20" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/ffn_1/intermediate/dense/add_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/ffn_1/intermediate/dense/add_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 3072 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/ffn_1/intermediate/dense/add_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/ffn_1/intermediate/dense/add_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/ffn_1/intermediate/dense/add_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/ffn_1/intermediate/dense/einsum/Reshape_1_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\004\000\000\000\014\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/ffn_1/intermediate/dense/einsum/Reshape_1_grad/Reshape" + op: "Reshape" + input: "gradients/AddN_20" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/ffn_1/intermediate/dense/einsum/Reshape_1_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/ffn_1/intermediate/dense/einsum/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/ffn_1/intermediate/dense/einsum/Reshape_1_grad/Reshape" + input: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/ffn_1/intermediate/dense/einsum/MatMul_grad/MatMul_1" + op: "MatMul" + input: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/ffn_1/intermediate/dense/einsum/Reshape" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/ffn_1/intermediate/dense/einsum/Reshape_1_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/ffn_1/intermediate/dense/einsum/Reshape_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/ffn_1/intermediate/dense/einsum/Reshape_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/ffn_1/intermediate/dense/einsum/MatMul_grad/MatMul" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/ffn_1/intermediate/dense/einsum/Reshape_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/AddN_21" + op: "AddN" + input: "gradients/AddN_19" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/ffn_1/intermediate/dense/einsum/Reshape_grad/Reshape" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm_1/batchnorm/mul_1_grad/Mul" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm/batchnorm/mul_1_grad/Mul" + op: "Mul" + input: "gradients/AddN_21" + input: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm/batchnorm/mul_1_grad/Mul_1" + op: "Mul" + input: "gradients/AddN_21" + input: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm/batchnorm/sub_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm/batchnorm/sub_grad/Sum" + op: "Sum" + input: "gradients/AddN_21" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm/batchnorm/sub_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm/batchnorm/sub_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 768 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm/batchnorm/sub_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm/batchnorm/sub_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm/batchnorm/sub_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm/batchnorm/sub_grad/Neg" + op: "Neg" + input: "gradients/AddN_21" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm/batchnorm/mul_2_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm/batchnorm/sub_grad/Neg" + input: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm/batchnorm/mul_2_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 2 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm/batchnorm/mul_2_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm/batchnorm/mul_2_grad/Mul" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm/batchnorm/mul_2_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm/batchnorm/mul_2_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm/batchnorm/mul_2_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm/batchnorm/mul_2_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm/batchnorm/mul_2_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm/batchnorm/mul_2_grad/Mul_1" + op: "Mul" + input: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm/moments/mean" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm/batchnorm/sub_grad/Neg" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/AddN_22" + op: "AddN" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm/batchnorm/mul_1_grad/Mul_1" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm/batchnorm/mul_2_grad/Mul_1" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm/batchnorm/mul_1_grad/Mul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm/batchnorm/mul_grad/Mul" + op: "Mul" + input: "gradients/AddN_22" + input: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm/gamma/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm/batchnorm/mul_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 2 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm/batchnorm/mul_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm/batchnorm/mul_grad/Mul" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm/batchnorm/mul_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm/batchnorm/mul_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm/batchnorm/mul_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm/batchnorm/mul_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm/batchnorm/mul_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm/batchnorm/mul_grad/Mul_1" + op: "Mul" + input: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm/batchnorm/Rsqrt" + input: "gradients/AddN_22" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm/batchnorm/mul_grad/Sum_1/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm/batchnorm/mul_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm/batchnorm/mul_grad/Mul_1" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm/batchnorm/mul_grad/Sum_1/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm/batchnorm/mul_grad/Reshape_1/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 768 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm/batchnorm/mul_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm/batchnorm/mul_grad/Sum_1" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm/batchnorm/mul_grad/Reshape_1/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad" + op: "RsqrtGrad" + input: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm/batchnorm/Rsqrt" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm/batchnorm/mul_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm/batchnorm/add_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\000\000\000\000\001\000\000\000\002\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm/batchnorm/add_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm/batchnorm/add_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm/batchnorm/add_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm/batchnorm/add_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm/batchnorm/add_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm/batchnorm/add_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm/moments/variance_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm/moments/variance_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm/moments/variance_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm/moments/variance_grad/Tile/multiples" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\001\000\000\000\001\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm/moments/variance_grad/Tile" + op: "Tile" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm/moments/variance_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm/moments/variance_grad/Tile/multiples" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tmultiples" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm/moments/variance_grad/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 768.0 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm/moments/variance_grad/truediv" + op: "RealDiv" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm/moments/variance_grad/Tile" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm/moments/variance_grad/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm/moments/SquaredDifference_grad/scalar" + op: "Const" + input: "^gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm/moments/variance_grad/truediv" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 2.0 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm/moments/SquaredDifference_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm/moments/SquaredDifference_grad/scalar" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm/moments/variance_grad/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm/moments/SquaredDifference_grad/sub" + op: "Sub" + input: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/add" + input: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm/moments/StopGradient" + input: "^gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm/moments/variance_grad/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm/moments/SquaredDifference_grad/mul_1" + op: "Mul" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm/moments/SquaredDifference_grad/Mul" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm/moments/SquaredDifference_grad/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm/moments/SquaredDifference_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 2 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm/moments/SquaredDifference_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm/moments/SquaredDifference_grad/mul_1" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm/moments/SquaredDifference_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm/moments/SquaredDifference_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm/moments/SquaredDifference_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm/moments/SquaredDifference_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm/moments/SquaredDifference_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm/moments/SquaredDifference_grad/Neg" + op: "Neg" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm/moments/SquaredDifference_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm/moments/mean_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm/moments/mean_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm/batchnorm/mul_2_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm/moments/mean_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm/moments/mean_grad/Tile/multiples" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\001\000\000\000\001\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm/moments/mean_grad/Tile" + op: "Tile" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm/moments/mean_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm/moments/mean_grad/Tile/multiples" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tmultiples" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm/moments/mean_grad/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 768.0 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm/moments/mean_grad/truediv" + op: "RealDiv" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm/moments/mean_grad/Tile" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm/moments/mean_grad/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/AddN_23" + op: "AddN" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm/batchnorm/mul_1_grad/Mul" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm/moments/SquaredDifference_grad/mul_1" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm/moments/mean_grad/truediv" + attr { + key: "N" + value { + i: 3 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm/batchnorm/mul_1_grad/Mul" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/output/dense/add_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/output/dense/add_grad/Sum" + op: "Sum" + input: "gradients/AddN_23" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/output/dense/add_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/output/dense/add_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 768 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/output/dense/add_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/output/dense/add_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/output/dense/add_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/output/dense/einsum/Reshape_2_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\004\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/output/dense/einsum/Reshape_2_grad/Reshape" + op: "Reshape" + input: "gradients/AddN_23" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/output/dense/einsum/Reshape_2_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/output/dense/einsum/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/output/dense/einsum/Reshape_2_grad/Reshape" + input: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/output/dense/einsum/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/output/dense/einsum/MatMul_grad/MatMul_1" + op: "MatMul" + input: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/output/dense/einsum/Reshape" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/output/dense/einsum/Reshape_2_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/output/dense/einsum/Reshape_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\020\000\000\000@\000\000\000@\000\000\000\014\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/output/dense/einsum/Reshape_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/output/dense/einsum/MatMul_grad/MatMul" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/output/dense/einsum/Reshape_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 64 + } + dim { + size: 12 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/output/dense/einsum/Reshape_1_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "@\000\000\000\014\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/output/dense/einsum/Reshape_1_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/output/dense/einsum/MatMul_grad/MatMul_1" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/output/dense/einsum/Reshape_1_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 64 + } + dim { + size: 12 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/output/dense/einsum/transpose_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/output/dense/einsum/transpose/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/output/dense/einsum/transpose_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/output/dense/einsum/Reshape_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/output/dense/einsum/transpose_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/output/dense/einsum/transpose_1_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/output/dense/einsum/transpose_1/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/output/dense/einsum/transpose_1_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/output/dense/einsum/Reshape_1_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/output/dense/einsum/transpose_1_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/transpose_3_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/transpose_3/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/transpose_3_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/output/dense/einsum/transpose_grad/transpose" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/transpose_3_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/output/dense/Reshape_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/output/dense/Reshape_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/output/dense/einsum/transpose_1_grad/transpose" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/output/dense/Reshape_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/MatMul_2_grad/MatMul" + op: "BatchMatMulV2" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/transpose_3_grad/transpose" + input: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/transpose_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: false + } + } + attr { + key: "adj_y" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/MatMul_2_grad/MatMul_1" + op: "BatchMatMulV2" + input: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/attention_probs" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/transpose_3_grad/transpose" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: true + } + } + attr { + key: "adj_y" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/attention_probs_grad/mul" + op: "Mul" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/MatMul_2_grad/MatMul" + input: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/attention_probs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/attention_probs_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: -1 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/attention_probs_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/attention_probs_grad/mul" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/attention_probs_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/attention_probs_grad/sub" + op: "Sub" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/MatMul_2_grad/MatMul" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/attention_probs_grad/Sum" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/attention_probs_grad/mul_1" + op: "Mul" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/attention_probs_grad/sub" + input: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/attention_probs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/transpose_2_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/transpose_2/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/transpose_2_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/MatMul_2_grad/MatMul_1" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/transpose_2_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/add_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/add_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/attention_probs_grad/mul_1" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/add_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/add_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\020\000\000\000\001\000\000\000@\000\000\000@\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/add_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/add_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/add_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 1 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/value/add_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/value/add_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/transpose_2_grad/transpose" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/value/add_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/value/add_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\014\000\000\000@\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/value/add_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/value/add_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/value/add_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/Mul_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/attention_probs_grad/mul_1" + input: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/Mul/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/Mul_grad/Mul_1" + op: "Mul" + input: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/MatMul" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/attention_probs_grad/mul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/Mul_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\000\000\000\000\001\000\000\000\002\000\000\000\003\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/Mul_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/Mul_grad/Mul_1" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/Mul_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/Mul_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/Mul_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/Mul_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/Mul_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/value/einsum/transpose_1_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/value/einsum/transpose_1/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/value/einsum/transpose_1_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/transpose_2_grad/transpose" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/value/einsum/transpose_1_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 64 + } + dim { + size: 12 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/value/Reshape_1_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 768 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/value/Reshape_1_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/value/add_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/value/Reshape_1_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/MatMul_grad/MatMul" + op: "BatchMatMulV2" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/Mul_grad/Mul" + input: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/transpose_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: false + } + } + attr { + key: "adj_y" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/MatMul_grad/MatMul_1" + op: "BatchMatMulV2" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/Mul_grad/Mul" + input: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/transpose" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: true + } + } + attr { + key: "adj_y" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/value/einsum/Reshape_2_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\004\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/value/einsum/Reshape_2_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/value/einsum/transpose_1_grad/transpose" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/value/einsum/Reshape_2_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/transpose_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/transpose/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/transpose_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/MatMul_grad/MatMul" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/transpose_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/transpose_1_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/transpose_1/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/transpose_1_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/MatMul_grad/MatMul_1" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/transpose_1_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/value/einsum/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/value/einsum/Reshape_2_grad/Reshape" + input: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/value/einsum/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/value/einsum/MatMul_grad/MatMul_1" + op: "MatMul" + input: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/value/einsum/Reshape" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/value/einsum/Reshape_2_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/query/add_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/query/add_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/transpose_grad/transpose" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/query/add_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/query/add_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\014\000\000\000@\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/query/add_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/query/add_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/query/add_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/key/add_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/key/add_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/transpose_1_grad/transpose" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/key/add_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/key/add_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\014\000\000\000@\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/key/add_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/key/add_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/key/add_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/value/einsum/Reshape_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/value/einsum/Reshape_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/value/einsum/MatMul_grad/MatMul" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/value/einsum/Reshape_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/value/einsum/Reshape_1_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\000\003\000\000@\000\000\000\014\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/value/einsum/Reshape_1_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/value/einsum/MatMul_grad/MatMul_1" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/value/einsum/Reshape_1_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 64 + } + dim { + size: 12 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/query/einsum/transpose_1_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/query/einsum/transpose_1/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/query/einsum/transpose_1_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/transpose_grad/transpose" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/query/einsum/transpose_1_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 64 + } + dim { + size: 12 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/query/Reshape_1_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 768 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/query/Reshape_1_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/query/add_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/query/Reshape_1_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/key/einsum/transpose_1_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/key/einsum/transpose_1/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/key/einsum/transpose_1_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/transpose_1_grad/transpose" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/key/einsum/transpose_1_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 64 + } + dim { + size: 12 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/key/Reshape_1_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 768 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/key/Reshape_1_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/key/add_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/key/Reshape_1_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/value/einsum/transpose_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/value/einsum/transpose/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/value/einsum/transpose_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/value/einsum/Reshape_1_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/value/einsum/transpose_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/query/einsum/Reshape_2_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\004\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/query/einsum/Reshape_2_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/query/einsum/transpose_1_grad/transpose" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/query/einsum/Reshape_2_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/key/einsum/Reshape_2_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\004\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/key/einsum/Reshape_2_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/key/einsum/transpose_1_grad/transpose" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/key/einsum/Reshape_2_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/value/Reshape_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/value/Reshape_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/value/einsum/transpose_grad/transpose" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/value/Reshape_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/query/einsum/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/query/einsum/Reshape_2_grad/Reshape" + input: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/query/einsum/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/query/einsum/MatMul_grad/MatMul_1" + op: "MatMul" + input: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/query/einsum/Reshape" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/query/einsum/Reshape_2_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/key/einsum/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/key/einsum/Reshape_2_grad/Reshape" + input: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/key/einsum/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/key/einsum/MatMul_grad/MatMul_1" + op: "MatMul" + input: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/key/einsum/Reshape" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/key/einsum/Reshape_2_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/query/einsum/Reshape_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/query/einsum/Reshape_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/query/einsum/MatMul_grad/MatMul" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/query/einsum/Reshape_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/query/einsum/Reshape_1_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\000\003\000\000@\000\000\000\014\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/query/einsum/Reshape_1_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/query/einsum/MatMul_grad/MatMul_1" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/query/einsum/Reshape_1_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 64 + } + dim { + size: 12 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/key/einsum/Reshape_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/key/einsum/Reshape_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/key/einsum/MatMul_grad/MatMul" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/key/einsum/Reshape_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/key/einsum/Reshape_1_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\000\003\000\000@\000\000\000\014\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/key/einsum/Reshape_1_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/key/einsum/MatMul_grad/MatMul_1" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/key/einsum/Reshape_1_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 64 + } + dim { + size: 12 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/query/einsum/transpose_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/query/einsum/transpose/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/query/einsum/transpose_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/query/einsum/Reshape_1_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/query/einsum/transpose_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/AddN_24" + op: "AddN" + input: "gradients/AddN_23" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/value/einsum/Reshape_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/query/einsum/Reshape_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/key/einsum/Reshape_grad/Reshape" + attr { + key: "N" + value { + i: 4 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm/batchnorm/mul_1_grad/Mul" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/key/einsum/transpose_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/key/einsum/transpose/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/key/einsum/transpose_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/key/einsum/Reshape_1_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/key/einsum/transpose_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/query/Reshape_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/query/Reshape_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/query/einsum/transpose_grad/transpose" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/query/Reshape_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm_1/batchnorm/mul_1_grad/Mul" + op: "Mul" + input: "gradients/AddN_24" + input: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm_1/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm_1/batchnorm/mul_1_grad/Mul_1" + op: "Mul" + input: "gradients/AddN_24" + input: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/add_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm_1/batchnorm/sub_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm_1/batchnorm/sub_grad/Sum" + op: "Sum" + input: "gradients/AddN_24" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm_1/batchnorm/sub_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm_1/batchnorm/sub_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 768 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm_1/batchnorm/sub_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm_1/batchnorm/sub_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm_1/batchnorm/sub_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm_1/batchnorm/sub_grad/Neg" + op: "Neg" + input: "gradients/AddN_24" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/key/Reshape_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/key/Reshape_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/key/einsum/transpose_grad/transpose" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/key/Reshape_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm_1/batchnorm/mul_2_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm_1/batchnorm/sub_grad/Neg" + input: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm_1/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm_1/batchnorm/mul_2_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 2 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm_1/batchnorm/mul_2_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm_1/batchnorm/mul_2_grad/Mul" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm_1/batchnorm/mul_2_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm_1/batchnorm/mul_2_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm_1/batchnorm/mul_2_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm_1/batchnorm/mul_2_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm_1/batchnorm/mul_2_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm_1/batchnorm/mul_2_grad/Mul_1" + op: "Mul" + input: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm_1/moments/mean" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm_1/batchnorm/sub_grad/Neg" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/AddN_25" + op: "AddN" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm_1/batchnorm/mul_1_grad/Mul_1" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm_1/batchnorm/mul_2_grad/Mul_1" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm_1/batchnorm/mul_1_grad/Mul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Mul" + op: "Mul" + input: "gradients/AddN_25" + input: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm_1/gamma/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 2 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Mul" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Mul_1" + op: "Mul" + input: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm_1/batchnorm/Rsqrt" + input: "gradients/AddN_25" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Sum_1/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Mul_1" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Sum_1/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Reshape_1/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 768 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Sum_1" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Reshape_1/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm_1/batchnorm/Rsqrt_grad/RsqrtGrad" + op: "RsqrtGrad" + input: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm_1/batchnorm/Rsqrt" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm_1/batchnorm/add_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\000\000\000\000\001\000\000\000\002\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm_1/batchnorm/add_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm_1/batchnorm/Rsqrt_grad/RsqrtGrad" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm_1/batchnorm/add_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm_1/batchnorm/add_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm_1/batchnorm/add_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm_1/batchnorm/add_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm_1/batchnorm/add_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm_1/moments/variance_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm_1/moments/variance_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm_1/batchnorm/Rsqrt_grad/RsqrtGrad" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm_1/moments/variance_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm_1/moments/variance_grad/Tile/multiples" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\001\000\000\000\001\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm_1/moments/variance_grad/Tile" + op: "Tile" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm_1/moments/variance_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm_1/moments/variance_grad/Tile/multiples" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tmultiples" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm_1/moments/variance_grad/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 768.0 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm_1/moments/variance_grad/truediv" + op: "RealDiv" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm_1/moments/variance_grad/Tile" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm_1/moments/variance_grad/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm_1/moments/SquaredDifference_grad/scalar" + op: "Const" + input: "^gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm_1/moments/variance_grad/truediv" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 2.0 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm_1/moments/SquaredDifference_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm_1/moments/SquaredDifference_grad/scalar" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm_1/moments/variance_grad/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm_1/moments/SquaredDifference_grad/sub" + op: "Sub" + input: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/add_1" + input: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm_1/moments/StopGradient" + input: "^gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm_1/moments/variance_grad/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm_1/moments/SquaredDifference_grad/mul_1" + op: "Mul" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm_1/moments/SquaredDifference_grad/Mul" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm_1/moments/SquaredDifference_grad/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm_1/moments/SquaredDifference_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 2 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm_1/moments/SquaredDifference_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm_1/moments/SquaredDifference_grad/mul_1" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm_1/moments/SquaredDifference_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm_1/moments/SquaredDifference_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm_1/moments/SquaredDifference_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm_1/moments/SquaredDifference_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm_1/moments/SquaredDifference_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm_1/moments/SquaredDifference_grad/Neg" + op: "Neg" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm_1/moments/SquaredDifference_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm_1/moments/mean_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm_1/moments/mean_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm_1/batchnorm/mul_2_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm_1/moments/mean_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm_1/moments/mean_grad/Tile/multiples" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\001\000\000\000\001\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm_1/moments/mean_grad/Tile" + op: "Tile" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm_1/moments/mean_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm_1/moments/mean_grad/Tile/multiples" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tmultiples" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm_1/moments/mean_grad/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 768.0 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm_1/moments/mean_grad/truediv" + op: "RealDiv" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm_1/moments/mean_grad/Tile" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm_1/moments/mean_grad/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/AddN_26" + op: "AddN" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm_1/batchnorm/mul_1_grad/Mul" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm_1/moments/SquaredDifference_grad/mul_1" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm_1/moments/mean_grad/truediv" + attr { + key: "N" + value { + i: 3 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm_1/batchnorm/mul_1_grad/Mul" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/ffn_1/intermediate/output/dense/add_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/ffn_1/intermediate/output/dense/add_grad/Sum" + op: "Sum" + input: "gradients/AddN_26" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/ffn_1/intermediate/output/dense/add_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/ffn_1/intermediate/output/dense/add_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 768 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/ffn_1/intermediate/output/dense/add_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/ffn_1/intermediate/output/dense/add_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/ffn_1/intermediate/output/dense/add_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape_1_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\004\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape_1_grad/Reshape" + op: "Reshape" + input: "gradients/AddN_26" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape_1_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/ffn_1/intermediate/output/dense/einsum/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape_1_grad/Reshape" + input: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/ffn_1/intermediate/output/dense/einsum/MatMul_grad/MatMul_1" + op: "MatMul" + input: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape_1_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\000\014\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/ffn_1/intermediate/output/dense/einsum/MatMul_grad/MatMul" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/ffn_1/intermediate/mul_3_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape_grad/Reshape" + input: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/ffn_1/intermediate/mul_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/ffn_1/intermediate/mul_3_grad/Mul_1" + op: "Mul" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape_grad/Reshape" + input: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/ffn_1/intermediate/dense/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/ffn_1/intermediate/mul_2_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/ffn_1/intermediate/mul_3_grad/Mul_1" + input: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/ffn_1/intermediate/add_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/ffn_1/intermediate/mul_2_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\000\000\000\000\001\000\000\000\002\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/ffn_1/intermediate/mul_2_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/ffn_1/intermediate/mul_2_grad/Mul" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/ffn_1/intermediate/mul_2_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/ffn_1/intermediate/mul_2_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/ffn_1/intermediate/mul_2_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/ffn_1/intermediate/mul_2_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/ffn_1/intermediate/mul_2_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/ffn_1/intermediate/mul_2_grad/Mul_1" + op: "Mul" + input: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/ffn_1/intermediate/mul_2/x" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/ffn_1/intermediate/mul_3_grad/Mul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/ffn_1/intermediate/add_1_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\000\000\000\000\001\000\000\000\002\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/ffn_1/intermediate/add_1_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/ffn_1/intermediate/mul_2_grad/Mul_1" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/ffn_1/intermediate/add_1_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/ffn_1/intermediate/add_1_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/ffn_1/intermediate/add_1_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/ffn_1/intermediate/add_1_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/ffn_1/intermediate/add_1_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/ffn_1/intermediate/Tanh_grad/TanhGrad" + op: "TanhGrad" + input: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/ffn_1/intermediate/Tanh" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/ffn_1/intermediate/mul_2_grad/Mul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/ffn_1/intermediate/mul_1_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/ffn_1/intermediate/Tanh_grad/TanhGrad" + input: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/ffn_1/intermediate/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/ffn_1/intermediate/mul_1_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\000\000\000\000\001\000\000\000\002\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/ffn_1/intermediate/mul_1_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/ffn_1/intermediate/mul_1_grad/Mul" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/ffn_1/intermediate/mul_1_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/ffn_1/intermediate/mul_1_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/ffn_1/intermediate/mul_1_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/ffn_1/intermediate/mul_1_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/ffn_1/intermediate/mul_1_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/ffn_1/intermediate/mul_1_grad/Mul_1" + op: "Mul" + input: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/ffn_1/intermediate/mul_1/x" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/ffn_1/intermediate/Tanh_grad/TanhGrad" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/ffn_1/intermediate/mul_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/ffn_1/intermediate/mul_1_grad/Mul_1" + input: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/ffn_1/intermediate/Pow" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/ffn_1/intermediate/mul_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\000\000\000\000\001\000\000\000\002\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/ffn_1/intermediate/mul_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/ffn_1/intermediate/mul_grad/Mul" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/ffn_1/intermediate/mul_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/ffn_1/intermediate/mul_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/ffn_1/intermediate/mul_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/ffn_1/intermediate/mul_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/ffn_1/intermediate/mul_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/ffn_1/intermediate/mul_grad/Mul_1" + op: "Mul" + input: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/ffn_1/intermediate/mul/x" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/ffn_1/intermediate/mul_1_grad/Mul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/ffn_1/intermediate/Pow_grad/mul" + op: "Mul" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/ffn_1/intermediate/mul_grad/Mul_1" + input: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/ffn_1/intermediate/Pow/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/ffn_1/intermediate/Pow_grad/sub/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/ffn_1/intermediate/Pow_grad/sub" + op: "Sub" + input: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/ffn_1/intermediate/Pow/y" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/ffn_1/intermediate/Pow_grad/sub/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/ffn_1/intermediate/Pow_grad/Pow" + op: "Pow" + input: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/ffn_1/intermediate/dense/add" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/ffn_1/intermediate/Pow_grad/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/ffn_1/intermediate/Pow_grad/mul_1" + op: "Mul" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/ffn_1/intermediate/Pow_grad/mul" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/ffn_1/intermediate/Pow_grad/Pow" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/ffn_1/intermediate/Pow_grad/Greater/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/ffn_1/intermediate/Pow_grad/Greater" + op: "Greater" + input: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/ffn_1/intermediate/dense/add" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/ffn_1/intermediate/Pow_grad/Greater/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/ffn_1/intermediate/Pow_grad/ones_like/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\000\014\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/ffn_1/intermediate/Pow_grad/ones_like/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/ffn_1/intermediate/Pow_grad/ones_like" + op: "Fill" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/ffn_1/intermediate/Pow_grad/ones_like/Shape" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/ffn_1/intermediate/Pow_grad/ones_like/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/ffn_1/intermediate/Pow_grad/Select" + op: "Select" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/ffn_1/intermediate/Pow_grad/Greater" + input: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/ffn_1/intermediate/dense/add" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/ffn_1/intermediate/Pow_grad/ones_like" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/ffn_1/intermediate/Pow_grad/Log" + op: "Log" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/ffn_1/intermediate/Pow_grad/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/ffn_1/intermediate/Pow_grad/zeros_like/shape_as_tensor" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\000\014\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/ffn_1/intermediate/Pow_grad/zeros_like/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/ffn_1/intermediate/Pow_grad/zeros_like" + op: "Fill" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/ffn_1/intermediate/Pow_grad/zeros_like/shape_as_tensor" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/ffn_1/intermediate/Pow_grad/zeros_like/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/ffn_1/intermediate/Pow_grad/Select_1" + op: "Select" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/ffn_1/intermediate/Pow_grad/Greater" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/ffn_1/intermediate/Pow_grad/Log" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/ffn_1/intermediate/Pow_grad/zeros_like" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/ffn_1/intermediate/Pow_grad/mul_2" + op: "Mul" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/ffn_1/intermediate/mul_grad/Mul_1" + input: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/ffn_1/intermediate/Pow" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/ffn_1/intermediate/Pow_grad/mul_3" + op: "Mul" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/ffn_1/intermediate/Pow_grad/mul_2" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/ffn_1/intermediate/Pow_grad/Select_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/ffn_1/intermediate/Pow_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\000\000\000\000\001\000\000\000\002\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/ffn_1/intermediate/Pow_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/ffn_1/intermediate/Pow_grad/mul_3" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/ffn_1/intermediate/Pow_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/ffn_1/intermediate/Pow_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/ffn_1/intermediate/Pow_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/ffn_1/intermediate/Pow_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/ffn_1/intermediate/Pow_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/AddN_27" + op: "AddN" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/ffn_1/intermediate/mul_3_grad/Mul" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/ffn_1/intermediate/mul_1_grad/Mul_1" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/ffn_1/intermediate/Pow_grad/mul_1" + attr { + key: "N" + value { + i: 3 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/ffn_1/intermediate/mul_3_grad/Mul" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/ffn_1/intermediate/dense/add_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/ffn_1/intermediate/dense/add_grad/Sum" + op: "Sum" + input: "gradients/AddN_27" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/ffn_1/intermediate/dense/add_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/ffn_1/intermediate/dense/add_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 3072 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/ffn_1/intermediate/dense/add_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/ffn_1/intermediate/dense/add_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/ffn_1/intermediate/dense/add_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/ffn_1/intermediate/dense/einsum/Reshape_1_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\004\000\000\000\014\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/ffn_1/intermediate/dense/einsum/Reshape_1_grad/Reshape" + op: "Reshape" + input: "gradients/AddN_27" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/ffn_1/intermediate/dense/einsum/Reshape_1_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/ffn_1/intermediate/dense/einsum/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/ffn_1/intermediate/dense/einsum/Reshape_1_grad/Reshape" + input: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/ffn_1/intermediate/dense/einsum/MatMul_grad/MatMul_1" + op: "MatMul" + input: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/ffn_1/intermediate/dense/einsum/Reshape" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/ffn_1/intermediate/dense/einsum/Reshape_1_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/ffn_1/intermediate/dense/einsum/Reshape_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/ffn_1/intermediate/dense/einsum/Reshape_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/ffn_1/intermediate/dense/einsum/MatMul_grad/MatMul" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/ffn_1/intermediate/dense/einsum/Reshape_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/AddN_28" + op: "AddN" + input: "gradients/AddN_26" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/ffn_1/intermediate/dense/einsum/Reshape_grad/Reshape" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm_1/batchnorm/mul_1_grad/Mul" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm/batchnorm/mul_1_grad/Mul" + op: "Mul" + input: "gradients/AddN_28" + input: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm/batchnorm/mul_1_grad/Mul_1" + op: "Mul" + input: "gradients/AddN_28" + input: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm/batchnorm/sub_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm/batchnorm/sub_grad/Sum" + op: "Sum" + input: "gradients/AddN_28" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm/batchnorm/sub_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm/batchnorm/sub_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 768 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm/batchnorm/sub_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm/batchnorm/sub_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm/batchnorm/sub_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm/batchnorm/sub_grad/Neg" + op: "Neg" + input: "gradients/AddN_28" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm/batchnorm/mul_2_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm/batchnorm/sub_grad/Neg" + input: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm/batchnorm/mul_2_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 2 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm/batchnorm/mul_2_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm/batchnorm/mul_2_grad/Mul" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm/batchnorm/mul_2_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm/batchnorm/mul_2_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm/batchnorm/mul_2_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm/batchnorm/mul_2_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm/batchnorm/mul_2_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm/batchnorm/mul_2_grad/Mul_1" + op: "Mul" + input: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm/moments/mean" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm/batchnorm/sub_grad/Neg" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/AddN_29" + op: "AddN" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm/batchnorm/mul_1_grad/Mul_1" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm/batchnorm/mul_2_grad/Mul_1" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm/batchnorm/mul_1_grad/Mul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm/batchnorm/mul_grad/Mul" + op: "Mul" + input: "gradients/AddN_29" + input: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm/gamma/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm/batchnorm/mul_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 2 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm/batchnorm/mul_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm/batchnorm/mul_grad/Mul" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm/batchnorm/mul_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm/batchnorm/mul_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm/batchnorm/mul_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm/batchnorm/mul_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm/batchnorm/mul_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm/batchnorm/mul_grad/Mul_1" + op: "Mul" + input: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm/batchnorm/Rsqrt" + input: "gradients/AddN_29" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm/batchnorm/mul_grad/Sum_1/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm/batchnorm/mul_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm/batchnorm/mul_grad/Mul_1" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm/batchnorm/mul_grad/Sum_1/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm/batchnorm/mul_grad/Reshape_1/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 768 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm/batchnorm/mul_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm/batchnorm/mul_grad/Sum_1" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm/batchnorm/mul_grad/Reshape_1/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad" + op: "RsqrtGrad" + input: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm/batchnorm/Rsqrt" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm/batchnorm/mul_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm/batchnorm/add_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\000\000\000\000\001\000\000\000\002\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm/batchnorm/add_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm/batchnorm/add_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm/batchnorm/add_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm/batchnorm/add_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm/batchnorm/add_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm/batchnorm/add_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm/moments/variance_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm/moments/variance_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm/moments/variance_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm/moments/variance_grad/Tile/multiples" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\001\000\000\000\001\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm/moments/variance_grad/Tile" + op: "Tile" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm/moments/variance_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm/moments/variance_grad/Tile/multiples" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tmultiples" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm/moments/variance_grad/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 768.0 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm/moments/variance_grad/truediv" + op: "RealDiv" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm/moments/variance_grad/Tile" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm/moments/variance_grad/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm/moments/SquaredDifference_grad/scalar" + op: "Const" + input: "^gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm/moments/variance_grad/truediv" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 2.0 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm/moments/SquaredDifference_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm/moments/SquaredDifference_grad/scalar" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm/moments/variance_grad/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm/moments/SquaredDifference_grad/sub" + op: "Sub" + input: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/add" + input: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm/moments/StopGradient" + input: "^gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm/moments/variance_grad/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm/moments/SquaredDifference_grad/mul_1" + op: "Mul" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm/moments/SquaredDifference_grad/Mul" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm/moments/SquaredDifference_grad/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm/moments/SquaredDifference_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 2 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm/moments/SquaredDifference_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm/moments/SquaredDifference_grad/mul_1" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm/moments/SquaredDifference_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm/moments/SquaredDifference_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm/moments/SquaredDifference_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm/moments/SquaredDifference_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm/moments/SquaredDifference_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm/moments/SquaredDifference_grad/Neg" + op: "Neg" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm/moments/SquaredDifference_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm/moments/mean_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm/moments/mean_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm/batchnorm/mul_2_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm/moments/mean_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm/moments/mean_grad/Tile/multiples" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\001\000\000\000\001\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm/moments/mean_grad/Tile" + op: "Tile" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm/moments/mean_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm/moments/mean_grad/Tile/multiples" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tmultiples" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm/moments/mean_grad/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 768.0 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm/moments/mean_grad/truediv" + op: "RealDiv" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm/moments/mean_grad/Tile" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm/moments/mean_grad/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/AddN_30" + op: "AddN" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm/batchnorm/mul_1_grad/Mul" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm/moments/SquaredDifference_grad/mul_1" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm/moments/mean_grad/truediv" + attr { + key: "N" + value { + i: 3 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm/batchnorm/mul_1_grad/Mul" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/output/dense/add_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/output/dense/add_grad/Sum" + op: "Sum" + input: "gradients/AddN_30" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/output/dense/add_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/output/dense/add_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 768 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/output/dense/add_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/output/dense/add_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/output/dense/add_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/output/dense/einsum/Reshape_2_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\004\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/output/dense/einsum/Reshape_2_grad/Reshape" + op: "Reshape" + input: "gradients/AddN_30" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/output/dense/einsum/Reshape_2_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/output/dense/einsum/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/output/dense/einsum/Reshape_2_grad/Reshape" + input: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/output/dense/einsum/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/output/dense/einsum/MatMul_grad/MatMul_1" + op: "MatMul" + input: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/output/dense/einsum/Reshape" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/output/dense/einsum/Reshape_2_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/output/dense/einsum/Reshape_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\020\000\000\000@\000\000\000@\000\000\000\014\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/output/dense/einsum/Reshape_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/output/dense/einsum/MatMul_grad/MatMul" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/output/dense/einsum/Reshape_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 64 + } + dim { + size: 12 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/output/dense/einsum/Reshape_1_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "@\000\000\000\014\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/output/dense/einsum/Reshape_1_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/output/dense/einsum/MatMul_grad/MatMul_1" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/output/dense/einsum/Reshape_1_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 64 + } + dim { + size: 12 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/output/dense/einsum/transpose_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/output/dense/einsum/transpose/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/output/dense/einsum/transpose_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/output/dense/einsum/Reshape_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/output/dense/einsum/transpose_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/output/dense/einsum/transpose_1_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/output/dense/einsum/transpose_1/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/output/dense/einsum/transpose_1_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/output/dense/einsum/Reshape_1_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/output/dense/einsum/transpose_1_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/transpose_3_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/transpose_3/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/transpose_3_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/output/dense/einsum/transpose_grad/transpose" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/transpose_3_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/output/dense/Reshape_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/output/dense/Reshape_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/output/dense/einsum/transpose_1_grad/transpose" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/output/dense/Reshape_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/MatMul_2_grad/MatMul" + op: "BatchMatMulV2" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/transpose_3_grad/transpose" + input: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/transpose_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: false + } + } + attr { + key: "adj_y" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/MatMul_2_grad/MatMul_1" + op: "BatchMatMulV2" + input: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/attention_probs" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/transpose_3_grad/transpose" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: true + } + } + attr { + key: "adj_y" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/attention_probs_grad/mul" + op: "Mul" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/MatMul_2_grad/MatMul" + input: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/attention_probs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/attention_probs_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: -1 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/attention_probs_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/attention_probs_grad/mul" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/attention_probs_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/attention_probs_grad/sub" + op: "Sub" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/MatMul_2_grad/MatMul" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/attention_probs_grad/Sum" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/attention_probs_grad/mul_1" + op: "Mul" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/attention_probs_grad/sub" + input: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/attention_probs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/transpose_2_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/transpose_2/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/transpose_2_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/MatMul_2_grad/MatMul_1" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/transpose_2_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/add_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/add_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/attention_probs_grad/mul_1" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/add_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/add_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\020\000\000\000\001\000\000\000@\000\000\000@\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/add_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/add_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/add_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 1 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/value/add_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/value/add_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/transpose_2_grad/transpose" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/value/add_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/value/add_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\014\000\000\000@\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/value/add_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/value/add_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/value/add_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/Mul_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/attention_probs_grad/mul_1" + input: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/Mul/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/Mul_grad/Mul_1" + op: "Mul" + input: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/MatMul" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/attention_probs_grad/mul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/Mul_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\000\000\000\000\001\000\000\000\002\000\000\000\003\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/Mul_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/Mul_grad/Mul_1" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/Mul_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/Mul_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/Mul_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/Mul_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/Mul_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/value/einsum/transpose_1_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/value/einsum/transpose_1/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/value/einsum/transpose_1_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/transpose_2_grad/transpose" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/value/einsum/transpose_1_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 64 + } + dim { + size: 12 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/value/Reshape_1_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 768 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/value/Reshape_1_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/value/add_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/value/Reshape_1_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/MatMul_grad/MatMul" + op: "BatchMatMulV2" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/Mul_grad/Mul" + input: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/transpose_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: false + } + } + attr { + key: "adj_y" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/MatMul_grad/MatMul_1" + op: "BatchMatMulV2" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/Mul_grad/Mul" + input: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/transpose" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: true + } + } + attr { + key: "adj_y" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/value/einsum/Reshape_2_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\004\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/value/einsum/Reshape_2_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/value/einsum/transpose_1_grad/transpose" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/value/einsum/Reshape_2_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/transpose_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/transpose/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/transpose_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/MatMul_grad/MatMul" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/transpose_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/transpose_1_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/transpose_1/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/transpose_1_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/MatMul_grad/MatMul_1" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/transpose_1_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/value/einsum/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/value/einsum/Reshape_2_grad/Reshape" + input: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/value/einsum/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/value/einsum/MatMul_grad/MatMul_1" + op: "MatMul" + input: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/value/einsum/Reshape" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/value/einsum/Reshape_2_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/query/add_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/query/add_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/transpose_grad/transpose" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/query/add_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/query/add_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\014\000\000\000@\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/query/add_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/query/add_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/query/add_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/key/add_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/key/add_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/transpose_1_grad/transpose" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/key/add_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/key/add_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\014\000\000\000@\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/key/add_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/key/add_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/key/add_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/value/einsum/Reshape_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/value/einsum/Reshape_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/value/einsum/MatMul_grad/MatMul" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/value/einsum/Reshape_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/value/einsum/Reshape_1_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\000\003\000\000@\000\000\000\014\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/value/einsum/Reshape_1_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/value/einsum/MatMul_grad/MatMul_1" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/value/einsum/Reshape_1_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 64 + } + dim { + size: 12 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/query/einsum/transpose_1_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/query/einsum/transpose_1/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/query/einsum/transpose_1_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/transpose_grad/transpose" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/query/einsum/transpose_1_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 64 + } + dim { + size: 12 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/query/Reshape_1_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 768 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/query/Reshape_1_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/query/add_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/query/Reshape_1_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/key/einsum/transpose_1_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/key/einsum/transpose_1/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/key/einsum/transpose_1_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/transpose_1_grad/transpose" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/key/einsum/transpose_1_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 64 + } + dim { + size: 12 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/key/Reshape_1_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 768 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/key/Reshape_1_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/key/add_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/key/Reshape_1_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/value/einsum/transpose_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/value/einsum/transpose/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/value/einsum/transpose_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/value/einsum/Reshape_1_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/value/einsum/transpose_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/query/einsum/Reshape_2_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\004\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/query/einsum/Reshape_2_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/query/einsum/transpose_1_grad/transpose" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/query/einsum/Reshape_2_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/key/einsum/Reshape_2_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\004\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/key/einsum/Reshape_2_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/key/einsum/transpose_1_grad/transpose" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/key/einsum/Reshape_2_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/value/Reshape_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/value/Reshape_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/value/einsum/transpose_grad/transpose" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/value/Reshape_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/query/einsum/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/query/einsum/Reshape_2_grad/Reshape" + input: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/query/einsum/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/query/einsum/MatMul_grad/MatMul_1" + op: "MatMul" + input: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/query/einsum/Reshape" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/query/einsum/Reshape_2_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/key/einsum/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/key/einsum/Reshape_2_grad/Reshape" + input: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/key/einsum/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/key/einsum/MatMul_grad/MatMul_1" + op: "MatMul" + input: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/key/einsum/Reshape" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/key/einsum/Reshape_2_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/query/einsum/Reshape_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/query/einsum/Reshape_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/query/einsum/MatMul_grad/MatMul" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/query/einsum/Reshape_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/query/einsum/Reshape_1_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\000\003\000\000@\000\000\000\014\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/query/einsum/Reshape_1_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/query/einsum/MatMul_grad/MatMul_1" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/query/einsum/Reshape_1_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 64 + } + dim { + size: 12 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/key/einsum/Reshape_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/key/einsum/Reshape_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/key/einsum/MatMul_grad/MatMul" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/key/einsum/Reshape_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/key/einsum/Reshape_1_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\000\003\000\000@\000\000\000\014\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/key/einsum/Reshape_1_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/key/einsum/MatMul_grad/MatMul_1" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/key/einsum/Reshape_1_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 64 + } + dim { + size: 12 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/query/einsum/transpose_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/query/einsum/transpose/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/query/einsum/transpose_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/query/einsum/Reshape_1_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/query/einsum/transpose_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/AddN_31" + op: "AddN" + input: "gradients/AddN_30" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/value/einsum/Reshape_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/query/einsum/Reshape_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/key/einsum/Reshape_grad/Reshape" + attr { + key: "N" + value { + i: 4 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm/batchnorm/mul_1_grad/Mul" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/key/einsum/transpose_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/key/einsum/transpose/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/key/einsum/transpose_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/key/einsum/Reshape_1_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/key/einsum/transpose_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/query/Reshape_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/query/Reshape_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/query/einsum/transpose_grad/transpose" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/query/Reshape_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm_1/batchnorm/mul_1_grad/Mul" + op: "Mul" + input: "gradients/AddN_31" + input: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm_1/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm_1/batchnorm/mul_1_grad/Mul_1" + op: "Mul" + input: "gradients/AddN_31" + input: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/add_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm_1/batchnorm/sub_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm_1/batchnorm/sub_grad/Sum" + op: "Sum" + input: "gradients/AddN_31" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm_1/batchnorm/sub_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm_1/batchnorm/sub_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 768 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm_1/batchnorm/sub_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm_1/batchnorm/sub_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm_1/batchnorm/sub_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm_1/batchnorm/sub_grad/Neg" + op: "Neg" + input: "gradients/AddN_31" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/key/Reshape_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/key/Reshape_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/key/einsum/transpose_grad/transpose" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/key/Reshape_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm_1/batchnorm/mul_2_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm_1/batchnorm/sub_grad/Neg" + input: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm_1/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm_1/batchnorm/mul_2_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 2 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm_1/batchnorm/mul_2_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm_1/batchnorm/mul_2_grad/Mul" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm_1/batchnorm/mul_2_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm_1/batchnorm/mul_2_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm_1/batchnorm/mul_2_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm_1/batchnorm/mul_2_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm_1/batchnorm/mul_2_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm_1/batchnorm/mul_2_grad/Mul_1" + op: "Mul" + input: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm_1/moments/mean" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm_1/batchnorm/sub_grad/Neg" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/AddN_32" + op: "AddN" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm_1/batchnorm/mul_1_grad/Mul_1" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm_1/batchnorm/mul_2_grad/Mul_1" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm_1/batchnorm/mul_1_grad/Mul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Mul" + op: "Mul" + input: "gradients/AddN_32" + input: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm_1/gamma/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 2 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Mul" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Mul_1" + op: "Mul" + input: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm_1/batchnorm/Rsqrt" + input: "gradients/AddN_32" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Sum_1/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Mul_1" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Sum_1/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Reshape_1/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 768 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Sum_1" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Reshape_1/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm_1/batchnorm/Rsqrt_grad/RsqrtGrad" + op: "RsqrtGrad" + input: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm_1/batchnorm/Rsqrt" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm_1/batchnorm/add_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\000\000\000\000\001\000\000\000\002\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm_1/batchnorm/add_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm_1/batchnorm/Rsqrt_grad/RsqrtGrad" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm_1/batchnorm/add_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm_1/batchnorm/add_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm_1/batchnorm/add_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm_1/batchnorm/add_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm_1/batchnorm/add_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm_1/moments/variance_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm_1/moments/variance_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm_1/batchnorm/Rsqrt_grad/RsqrtGrad" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm_1/moments/variance_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm_1/moments/variance_grad/Tile/multiples" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\001\000\000\000\001\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm_1/moments/variance_grad/Tile" + op: "Tile" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm_1/moments/variance_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm_1/moments/variance_grad/Tile/multiples" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tmultiples" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm_1/moments/variance_grad/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 768.0 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm_1/moments/variance_grad/truediv" + op: "RealDiv" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm_1/moments/variance_grad/Tile" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm_1/moments/variance_grad/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm_1/moments/SquaredDifference_grad/scalar" + op: "Const" + input: "^gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm_1/moments/variance_grad/truediv" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 2.0 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm_1/moments/SquaredDifference_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm_1/moments/SquaredDifference_grad/scalar" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm_1/moments/variance_grad/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm_1/moments/SquaredDifference_grad/sub" + op: "Sub" + input: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/add_1" + input: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm_1/moments/StopGradient" + input: "^gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm_1/moments/variance_grad/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm_1/moments/SquaredDifference_grad/mul_1" + op: "Mul" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm_1/moments/SquaredDifference_grad/Mul" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm_1/moments/SquaredDifference_grad/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm_1/moments/SquaredDifference_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 2 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm_1/moments/SquaredDifference_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm_1/moments/SquaredDifference_grad/mul_1" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm_1/moments/SquaredDifference_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm_1/moments/SquaredDifference_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm_1/moments/SquaredDifference_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm_1/moments/SquaredDifference_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm_1/moments/SquaredDifference_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm_1/moments/SquaredDifference_grad/Neg" + op: "Neg" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm_1/moments/SquaredDifference_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm_1/moments/mean_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm_1/moments/mean_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm_1/batchnorm/mul_2_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm_1/moments/mean_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm_1/moments/mean_grad/Tile/multiples" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\001\000\000\000\001\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm_1/moments/mean_grad/Tile" + op: "Tile" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm_1/moments/mean_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm_1/moments/mean_grad/Tile/multiples" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tmultiples" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm_1/moments/mean_grad/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 768.0 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm_1/moments/mean_grad/truediv" + op: "RealDiv" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm_1/moments/mean_grad/Tile" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm_1/moments/mean_grad/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/AddN_33" + op: "AddN" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm_1/batchnorm/mul_1_grad/Mul" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm_1/moments/SquaredDifference_grad/mul_1" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm_1/moments/mean_grad/truediv" + attr { + key: "N" + value { + i: 3 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm_1/batchnorm/mul_1_grad/Mul" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/ffn_1/intermediate/output/dense/add_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/ffn_1/intermediate/output/dense/add_grad/Sum" + op: "Sum" + input: "gradients/AddN_33" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/ffn_1/intermediate/output/dense/add_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/ffn_1/intermediate/output/dense/add_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 768 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/ffn_1/intermediate/output/dense/add_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/ffn_1/intermediate/output/dense/add_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/ffn_1/intermediate/output/dense/add_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape_1_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\004\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape_1_grad/Reshape" + op: "Reshape" + input: "gradients/AddN_33" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape_1_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/ffn_1/intermediate/output/dense/einsum/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape_1_grad/Reshape" + input: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/ffn_1/intermediate/output/dense/einsum/MatMul_grad/MatMul_1" + op: "MatMul" + input: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape_1_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\000\014\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/ffn_1/intermediate/output/dense/einsum/MatMul_grad/MatMul" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/ffn_1/intermediate/mul_3_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape_grad/Reshape" + input: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/ffn_1/intermediate/mul_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/ffn_1/intermediate/mul_3_grad/Mul_1" + op: "Mul" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape_grad/Reshape" + input: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/ffn_1/intermediate/dense/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/ffn_1/intermediate/mul_2_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/ffn_1/intermediate/mul_3_grad/Mul_1" + input: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/ffn_1/intermediate/add_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/ffn_1/intermediate/mul_2_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\000\000\000\000\001\000\000\000\002\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/ffn_1/intermediate/mul_2_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/ffn_1/intermediate/mul_2_grad/Mul" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/ffn_1/intermediate/mul_2_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/ffn_1/intermediate/mul_2_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/ffn_1/intermediate/mul_2_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/ffn_1/intermediate/mul_2_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/ffn_1/intermediate/mul_2_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/ffn_1/intermediate/mul_2_grad/Mul_1" + op: "Mul" + input: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/ffn_1/intermediate/mul_2/x" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/ffn_1/intermediate/mul_3_grad/Mul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/ffn_1/intermediate/add_1_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\000\000\000\000\001\000\000\000\002\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/ffn_1/intermediate/add_1_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/ffn_1/intermediate/mul_2_grad/Mul_1" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/ffn_1/intermediate/add_1_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/ffn_1/intermediate/add_1_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/ffn_1/intermediate/add_1_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/ffn_1/intermediate/add_1_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/ffn_1/intermediate/add_1_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/ffn_1/intermediate/Tanh_grad/TanhGrad" + op: "TanhGrad" + input: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/ffn_1/intermediate/Tanh" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/ffn_1/intermediate/mul_2_grad/Mul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/ffn_1/intermediate/mul_1_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/ffn_1/intermediate/Tanh_grad/TanhGrad" + input: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/ffn_1/intermediate/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/ffn_1/intermediate/mul_1_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\000\000\000\000\001\000\000\000\002\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/ffn_1/intermediate/mul_1_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/ffn_1/intermediate/mul_1_grad/Mul" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/ffn_1/intermediate/mul_1_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/ffn_1/intermediate/mul_1_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/ffn_1/intermediate/mul_1_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/ffn_1/intermediate/mul_1_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/ffn_1/intermediate/mul_1_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/ffn_1/intermediate/mul_1_grad/Mul_1" + op: "Mul" + input: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/ffn_1/intermediate/mul_1/x" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/ffn_1/intermediate/Tanh_grad/TanhGrad" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/ffn_1/intermediate/mul_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/ffn_1/intermediate/mul_1_grad/Mul_1" + input: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/ffn_1/intermediate/Pow" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/ffn_1/intermediate/mul_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\000\000\000\000\001\000\000\000\002\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/ffn_1/intermediate/mul_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/ffn_1/intermediate/mul_grad/Mul" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/ffn_1/intermediate/mul_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/ffn_1/intermediate/mul_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/ffn_1/intermediate/mul_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/ffn_1/intermediate/mul_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/ffn_1/intermediate/mul_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/ffn_1/intermediate/mul_grad/Mul_1" + op: "Mul" + input: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/ffn_1/intermediate/mul/x" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/ffn_1/intermediate/mul_1_grad/Mul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/ffn_1/intermediate/Pow_grad/mul" + op: "Mul" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/ffn_1/intermediate/mul_grad/Mul_1" + input: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/ffn_1/intermediate/Pow/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/ffn_1/intermediate/Pow_grad/sub/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/ffn_1/intermediate/Pow_grad/sub" + op: "Sub" + input: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/ffn_1/intermediate/Pow/y" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/ffn_1/intermediate/Pow_grad/sub/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/ffn_1/intermediate/Pow_grad/Pow" + op: "Pow" + input: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/ffn_1/intermediate/dense/add" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/ffn_1/intermediate/Pow_grad/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/ffn_1/intermediate/Pow_grad/mul_1" + op: "Mul" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/ffn_1/intermediate/Pow_grad/mul" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/ffn_1/intermediate/Pow_grad/Pow" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/ffn_1/intermediate/Pow_grad/Greater/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/ffn_1/intermediate/Pow_grad/Greater" + op: "Greater" + input: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/ffn_1/intermediate/dense/add" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/ffn_1/intermediate/Pow_grad/Greater/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/ffn_1/intermediate/Pow_grad/ones_like/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\000\014\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/ffn_1/intermediate/Pow_grad/ones_like/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/ffn_1/intermediate/Pow_grad/ones_like" + op: "Fill" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/ffn_1/intermediate/Pow_grad/ones_like/Shape" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/ffn_1/intermediate/Pow_grad/ones_like/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/ffn_1/intermediate/Pow_grad/Select" + op: "Select" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/ffn_1/intermediate/Pow_grad/Greater" + input: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/ffn_1/intermediate/dense/add" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/ffn_1/intermediate/Pow_grad/ones_like" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/ffn_1/intermediate/Pow_grad/Log" + op: "Log" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/ffn_1/intermediate/Pow_grad/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/ffn_1/intermediate/Pow_grad/zeros_like/shape_as_tensor" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\000\014\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/ffn_1/intermediate/Pow_grad/zeros_like/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/ffn_1/intermediate/Pow_grad/zeros_like" + op: "Fill" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/ffn_1/intermediate/Pow_grad/zeros_like/shape_as_tensor" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/ffn_1/intermediate/Pow_grad/zeros_like/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/ffn_1/intermediate/Pow_grad/Select_1" + op: "Select" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/ffn_1/intermediate/Pow_grad/Greater" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/ffn_1/intermediate/Pow_grad/Log" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/ffn_1/intermediate/Pow_grad/zeros_like" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/ffn_1/intermediate/Pow_grad/mul_2" + op: "Mul" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/ffn_1/intermediate/mul_grad/Mul_1" + input: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/ffn_1/intermediate/Pow" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/ffn_1/intermediate/Pow_grad/mul_3" + op: "Mul" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/ffn_1/intermediate/Pow_grad/mul_2" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/ffn_1/intermediate/Pow_grad/Select_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/ffn_1/intermediate/Pow_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\000\000\000\000\001\000\000\000\002\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/ffn_1/intermediate/Pow_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/ffn_1/intermediate/Pow_grad/mul_3" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/ffn_1/intermediate/Pow_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/ffn_1/intermediate/Pow_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/ffn_1/intermediate/Pow_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/ffn_1/intermediate/Pow_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/ffn_1/intermediate/Pow_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/AddN_34" + op: "AddN" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/ffn_1/intermediate/mul_3_grad/Mul" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/ffn_1/intermediate/mul_1_grad/Mul_1" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/ffn_1/intermediate/Pow_grad/mul_1" + attr { + key: "N" + value { + i: 3 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/ffn_1/intermediate/mul_3_grad/Mul" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/ffn_1/intermediate/dense/add_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/ffn_1/intermediate/dense/add_grad/Sum" + op: "Sum" + input: "gradients/AddN_34" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/ffn_1/intermediate/dense/add_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/ffn_1/intermediate/dense/add_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 3072 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/ffn_1/intermediate/dense/add_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/ffn_1/intermediate/dense/add_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/ffn_1/intermediate/dense/add_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/ffn_1/intermediate/dense/einsum/Reshape_1_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\004\000\000\000\014\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/ffn_1/intermediate/dense/einsum/Reshape_1_grad/Reshape" + op: "Reshape" + input: "gradients/AddN_34" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/ffn_1/intermediate/dense/einsum/Reshape_1_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/ffn_1/intermediate/dense/einsum/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/ffn_1/intermediate/dense/einsum/Reshape_1_grad/Reshape" + input: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/ffn_1/intermediate/dense/einsum/MatMul_grad/MatMul_1" + op: "MatMul" + input: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/ffn_1/intermediate/dense/einsum/Reshape" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/ffn_1/intermediate/dense/einsum/Reshape_1_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/ffn_1/intermediate/dense/einsum/Reshape_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/ffn_1/intermediate/dense/einsum/Reshape_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/ffn_1/intermediate/dense/einsum/MatMul_grad/MatMul" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/ffn_1/intermediate/dense/einsum/Reshape_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/AddN_35" + op: "AddN" + input: "gradients/AddN_33" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/ffn_1/intermediate/dense/einsum/Reshape_grad/Reshape" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm_1/batchnorm/mul_1_grad/Mul" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm/batchnorm/mul_1_grad/Mul" + op: "Mul" + input: "gradients/AddN_35" + input: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm/batchnorm/mul_1_grad/Mul_1" + op: "Mul" + input: "gradients/AddN_35" + input: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm/batchnorm/sub_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm/batchnorm/sub_grad/Sum" + op: "Sum" + input: "gradients/AddN_35" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm/batchnorm/sub_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm/batchnorm/sub_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 768 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm/batchnorm/sub_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm/batchnorm/sub_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm/batchnorm/sub_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm/batchnorm/sub_grad/Neg" + op: "Neg" + input: "gradients/AddN_35" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm/batchnorm/mul_2_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm/batchnorm/sub_grad/Neg" + input: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm/batchnorm/mul_2_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 2 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm/batchnorm/mul_2_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm/batchnorm/mul_2_grad/Mul" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm/batchnorm/mul_2_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm/batchnorm/mul_2_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm/batchnorm/mul_2_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm/batchnorm/mul_2_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm/batchnorm/mul_2_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm/batchnorm/mul_2_grad/Mul_1" + op: "Mul" + input: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm/moments/mean" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm/batchnorm/sub_grad/Neg" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/AddN_36" + op: "AddN" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm/batchnorm/mul_1_grad/Mul_1" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm/batchnorm/mul_2_grad/Mul_1" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm/batchnorm/mul_1_grad/Mul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm/batchnorm/mul_grad/Mul" + op: "Mul" + input: "gradients/AddN_36" + input: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm/gamma/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm/batchnorm/mul_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 2 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm/batchnorm/mul_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm/batchnorm/mul_grad/Mul" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm/batchnorm/mul_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm/batchnorm/mul_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm/batchnorm/mul_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm/batchnorm/mul_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm/batchnorm/mul_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm/batchnorm/mul_grad/Mul_1" + op: "Mul" + input: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm/batchnorm/Rsqrt" + input: "gradients/AddN_36" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm/batchnorm/mul_grad/Sum_1/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm/batchnorm/mul_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm/batchnorm/mul_grad/Mul_1" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm/batchnorm/mul_grad/Sum_1/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm/batchnorm/mul_grad/Reshape_1/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 768 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm/batchnorm/mul_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm/batchnorm/mul_grad/Sum_1" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm/batchnorm/mul_grad/Reshape_1/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad" + op: "RsqrtGrad" + input: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm/batchnorm/Rsqrt" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm/batchnorm/mul_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm/batchnorm/add_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\000\000\000\000\001\000\000\000\002\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm/batchnorm/add_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm/batchnorm/add_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm/batchnorm/add_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm/batchnorm/add_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm/batchnorm/add_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm/batchnorm/add_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm/moments/variance_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm/moments/variance_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm/moments/variance_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm/moments/variance_grad/Tile/multiples" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\001\000\000\000\001\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm/moments/variance_grad/Tile" + op: "Tile" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm/moments/variance_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm/moments/variance_grad/Tile/multiples" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tmultiples" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm/moments/variance_grad/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 768.0 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm/moments/variance_grad/truediv" + op: "RealDiv" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm/moments/variance_grad/Tile" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm/moments/variance_grad/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm/moments/SquaredDifference_grad/scalar" + op: "Const" + input: "^gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm/moments/variance_grad/truediv" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 2.0 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm/moments/SquaredDifference_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm/moments/SquaredDifference_grad/scalar" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm/moments/variance_grad/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm/moments/SquaredDifference_grad/sub" + op: "Sub" + input: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/add" + input: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm/moments/StopGradient" + input: "^gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm/moments/variance_grad/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm/moments/SquaredDifference_grad/mul_1" + op: "Mul" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm/moments/SquaredDifference_grad/Mul" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm/moments/SquaredDifference_grad/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm/moments/SquaredDifference_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 2 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm/moments/SquaredDifference_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm/moments/SquaredDifference_grad/mul_1" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm/moments/SquaredDifference_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm/moments/SquaredDifference_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm/moments/SquaredDifference_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm/moments/SquaredDifference_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm/moments/SquaredDifference_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm/moments/SquaredDifference_grad/Neg" + op: "Neg" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm/moments/SquaredDifference_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm/moments/mean_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm/moments/mean_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm/batchnorm/mul_2_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm/moments/mean_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm/moments/mean_grad/Tile/multiples" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\001\000\000\000\001\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm/moments/mean_grad/Tile" + op: "Tile" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm/moments/mean_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm/moments/mean_grad/Tile/multiples" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tmultiples" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm/moments/mean_grad/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 768.0 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm/moments/mean_grad/truediv" + op: "RealDiv" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm/moments/mean_grad/Tile" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm/moments/mean_grad/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/AddN_37" + op: "AddN" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm/batchnorm/mul_1_grad/Mul" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm/moments/SquaredDifference_grad/mul_1" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm/moments/mean_grad/truediv" + attr { + key: "N" + value { + i: 3 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm/batchnorm/mul_1_grad/Mul" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/output/dense/add_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/output/dense/add_grad/Sum" + op: "Sum" + input: "gradients/AddN_37" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/output/dense/add_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/output/dense/add_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 768 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/output/dense/add_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/output/dense/add_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/output/dense/add_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/output/dense/einsum/Reshape_2_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\004\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/output/dense/einsum/Reshape_2_grad/Reshape" + op: "Reshape" + input: "gradients/AddN_37" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/output/dense/einsum/Reshape_2_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/output/dense/einsum/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/output/dense/einsum/Reshape_2_grad/Reshape" + input: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/output/dense/einsum/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/output/dense/einsum/MatMul_grad/MatMul_1" + op: "MatMul" + input: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/output/dense/einsum/Reshape" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/output/dense/einsum/Reshape_2_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/output/dense/einsum/Reshape_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\020\000\000\000@\000\000\000@\000\000\000\014\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/output/dense/einsum/Reshape_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/output/dense/einsum/MatMul_grad/MatMul" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/output/dense/einsum/Reshape_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 64 + } + dim { + size: 12 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/output/dense/einsum/Reshape_1_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "@\000\000\000\014\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/output/dense/einsum/Reshape_1_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/output/dense/einsum/MatMul_grad/MatMul_1" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/output/dense/einsum/Reshape_1_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 64 + } + dim { + size: 12 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/output/dense/einsum/transpose_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/output/dense/einsum/transpose/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/output/dense/einsum/transpose_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/output/dense/einsum/Reshape_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/output/dense/einsum/transpose_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/output/dense/einsum/transpose_1_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/output/dense/einsum/transpose_1/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/output/dense/einsum/transpose_1_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/output/dense/einsum/Reshape_1_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/output/dense/einsum/transpose_1_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/transpose_3_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/transpose_3/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/transpose_3_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/output/dense/einsum/transpose_grad/transpose" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/transpose_3_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/output/dense/Reshape_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/output/dense/Reshape_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/output/dense/einsum/transpose_1_grad/transpose" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/output/dense/Reshape_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/MatMul_2_grad/MatMul" + op: "BatchMatMulV2" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/transpose_3_grad/transpose" + input: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/transpose_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: false + } + } + attr { + key: "adj_y" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/MatMul_2_grad/MatMul_1" + op: "BatchMatMulV2" + input: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/attention_probs" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/transpose_3_grad/transpose" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: true + } + } + attr { + key: "adj_y" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/attention_probs_grad/mul" + op: "Mul" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/MatMul_2_grad/MatMul" + input: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/attention_probs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/attention_probs_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: -1 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/attention_probs_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/attention_probs_grad/mul" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/attention_probs_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/attention_probs_grad/sub" + op: "Sub" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/MatMul_2_grad/MatMul" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/attention_probs_grad/Sum" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/attention_probs_grad/mul_1" + op: "Mul" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/attention_probs_grad/sub" + input: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/attention_probs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/transpose_2_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/transpose_2/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/transpose_2_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/MatMul_2_grad/MatMul_1" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/transpose_2_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/add_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/add_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/attention_probs_grad/mul_1" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/add_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/add_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\020\000\000\000\001\000\000\000@\000\000\000@\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/add_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/add_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/add_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 1 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/value/add_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/value/add_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/transpose_2_grad/transpose" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/value/add_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/value/add_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\014\000\000\000@\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/value/add_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/value/add_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/value/add_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/Mul_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/attention_probs_grad/mul_1" + input: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/Mul/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/Mul_grad/Mul_1" + op: "Mul" + input: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/MatMul" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/attention_probs_grad/mul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/Mul_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\000\000\000\000\001\000\000\000\002\000\000\000\003\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/Mul_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/Mul_grad/Mul_1" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/Mul_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/Mul_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/Mul_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/Mul_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/Mul_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/value/einsum/transpose_1_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/value/einsum/transpose_1/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/value/einsum/transpose_1_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/transpose_2_grad/transpose" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/value/einsum/transpose_1_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 64 + } + dim { + size: 12 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/value/Reshape_1_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 768 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/value/Reshape_1_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/value/add_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/value/Reshape_1_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/MatMul_grad/MatMul" + op: "BatchMatMulV2" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/Mul_grad/Mul" + input: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/transpose_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: false + } + } + attr { + key: "adj_y" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/MatMul_grad/MatMul_1" + op: "BatchMatMulV2" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/Mul_grad/Mul" + input: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/transpose" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: true + } + } + attr { + key: "adj_y" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/value/einsum/Reshape_2_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\004\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/value/einsum/Reshape_2_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/value/einsum/transpose_1_grad/transpose" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/value/einsum/Reshape_2_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/transpose_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/transpose/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/transpose_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/MatMul_grad/MatMul" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/transpose_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/transpose_1_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/transpose_1/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/transpose_1_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/MatMul_grad/MatMul_1" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/transpose_1_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/value/einsum/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/value/einsum/Reshape_2_grad/Reshape" + input: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/value/einsum/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/value/einsum/MatMul_grad/MatMul_1" + op: "MatMul" + input: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/value/einsum/Reshape" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/value/einsum/Reshape_2_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/query/add_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/query/add_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/transpose_grad/transpose" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/query/add_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/query/add_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\014\000\000\000@\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/query/add_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/query/add_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/query/add_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/key/add_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/key/add_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/transpose_1_grad/transpose" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/key/add_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/key/add_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\014\000\000\000@\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/key/add_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/key/add_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/key/add_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/value/einsum/Reshape_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/value/einsum/Reshape_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/value/einsum/MatMul_grad/MatMul" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/value/einsum/Reshape_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/value/einsum/Reshape_1_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\000\003\000\000@\000\000\000\014\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/value/einsum/Reshape_1_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/value/einsum/MatMul_grad/MatMul_1" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/value/einsum/Reshape_1_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 64 + } + dim { + size: 12 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/query/einsum/transpose_1_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/query/einsum/transpose_1/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/query/einsum/transpose_1_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/transpose_grad/transpose" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/query/einsum/transpose_1_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 64 + } + dim { + size: 12 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/query/Reshape_1_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 768 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/query/Reshape_1_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/query/add_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/query/Reshape_1_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/key/einsum/transpose_1_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/key/einsum/transpose_1/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/key/einsum/transpose_1_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/transpose_1_grad/transpose" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/key/einsum/transpose_1_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 64 + } + dim { + size: 12 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/key/Reshape_1_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 768 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/key/Reshape_1_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/key/add_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/key/Reshape_1_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/value/einsum/transpose_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/value/einsum/transpose/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/value/einsum/transpose_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/value/einsum/Reshape_1_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/value/einsum/transpose_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/query/einsum/Reshape_2_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\004\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/query/einsum/Reshape_2_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/query/einsum/transpose_1_grad/transpose" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/query/einsum/Reshape_2_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/key/einsum/Reshape_2_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\004\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/key/einsum/Reshape_2_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/key/einsum/transpose_1_grad/transpose" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/key/einsum/Reshape_2_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/value/Reshape_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/value/Reshape_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/value/einsum/transpose_grad/transpose" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/value/Reshape_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/query/einsum/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/query/einsum/Reshape_2_grad/Reshape" + input: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/query/einsum/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/query/einsum/MatMul_grad/MatMul_1" + op: "MatMul" + input: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/query/einsum/Reshape" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/query/einsum/Reshape_2_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/key/einsum/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/key/einsum/Reshape_2_grad/Reshape" + input: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/key/einsum/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/key/einsum/MatMul_grad/MatMul_1" + op: "MatMul" + input: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/key/einsum/Reshape" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/key/einsum/Reshape_2_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/query/einsum/Reshape_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/query/einsum/Reshape_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/query/einsum/MatMul_grad/MatMul" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/query/einsum/Reshape_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/query/einsum/Reshape_1_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\000\003\000\000@\000\000\000\014\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/query/einsum/Reshape_1_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/query/einsum/MatMul_grad/MatMul_1" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/query/einsum/Reshape_1_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 64 + } + dim { + size: 12 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/key/einsum/Reshape_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/key/einsum/Reshape_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/key/einsum/MatMul_grad/MatMul" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/key/einsum/Reshape_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/key/einsum/Reshape_1_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\000\003\000\000@\000\000\000\014\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/key/einsum/Reshape_1_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/key/einsum/MatMul_grad/MatMul_1" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/key/einsum/Reshape_1_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 64 + } + dim { + size: 12 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/query/einsum/transpose_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/query/einsum/transpose/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/query/einsum/transpose_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/query/einsum/Reshape_1_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/query/einsum/transpose_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/AddN_38" + op: "AddN" + input: "gradients/AddN_37" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/value/einsum/Reshape_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/query/einsum/Reshape_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/key/einsum/Reshape_grad/Reshape" + attr { + key: "N" + value { + i: 4 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm/batchnorm/mul_1_grad/Mul" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/key/einsum/transpose_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/key/einsum/transpose/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/key/einsum/transpose_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/key/einsum/Reshape_1_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/key/einsum/transpose_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/query/Reshape_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/query/Reshape_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/query/einsum/transpose_grad/transpose" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/query/Reshape_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm_1/batchnorm/mul_1_grad/Mul" + op: "Mul" + input: "gradients/AddN_38" + input: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm_1/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm_1/batchnorm/mul_1_grad/Mul_1" + op: "Mul" + input: "gradients/AddN_38" + input: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/add_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm_1/batchnorm/sub_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm_1/batchnorm/sub_grad/Sum" + op: "Sum" + input: "gradients/AddN_38" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm_1/batchnorm/sub_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm_1/batchnorm/sub_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 768 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm_1/batchnorm/sub_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm_1/batchnorm/sub_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm_1/batchnorm/sub_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm_1/batchnorm/sub_grad/Neg" + op: "Neg" + input: "gradients/AddN_38" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/key/Reshape_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/key/Reshape_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/key/einsum/transpose_grad/transpose" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/key/Reshape_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm_1/batchnorm/mul_2_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm_1/batchnorm/sub_grad/Neg" + input: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm_1/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm_1/batchnorm/mul_2_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 2 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm_1/batchnorm/mul_2_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm_1/batchnorm/mul_2_grad/Mul" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm_1/batchnorm/mul_2_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm_1/batchnorm/mul_2_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm_1/batchnorm/mul_2_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm_1/batchnorm/mul_2_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm_1/batchnorm/mul_2_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm_1/batchnorm/mul_2_grad/Mul_1" + op: "Mul" + input: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm_1/moments/mean" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm_1/batchnorm/sub_grad/Neg" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/AddN_39" + op: "AddN" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm_1/batchnorm/mul_1_grad/Mul_1" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm_1/batchnorm/mul_2_grad/Mul_1" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm_1/batchnorm/mul_1_grad/Mul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Mul" + op: "Mul" + input: "gradients/AddN_39" + input: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm_1/gamma/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 2 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Mul" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Mul_1" + op: "Mul" + input: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm_1/batchnorm/Rsqrt" + input: "gradients/AddN_39" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Sum_1/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Mul_1" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Sum_1/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Reshape_1/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 768 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Sum_1" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Reshape_1/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm_1/batchnorm/Rsqrt_grad/RsqrtGrad" + op: "RsqrtGrad" + input: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm_1/batchnorm/Rsqrt" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm_1/batchnorm/add_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\000\000\000\000\001\000\000\000\002\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm_1/batchnorm/add_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm_1/batchnorm/Rsqrt_grad/RsqrtGrad" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm_1/batchnorm/add_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm_1/batchnorm/add_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm_1/batchnorm/add_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm_1/batchnorm/add_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm_1/batchnorm/add_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm_1/moments/variance_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm_1/moments/variance_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm_1/batchnorm/Rsqrt_grad/RsqrtGrad" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm_1/moments/variance_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm_1/moments/variance_grad/Tile/multiples" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\001\000\000\000\001\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm_1/moments/variance_grad/Tile" + op: "Tile" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm_1/moments/variance_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm_1/moments/variance_grad/Tile/multiples" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tmultiples" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm_1/moments/variance_grad/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 768.0 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm_1/moments/variance_grad/truediv" + op: "RealDiv" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm_1/moments/variance_grad/Tile" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm_1/moments/variance_grad/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm_1/moments/SquaredDifference_grad/scalar" + op: "Const" + input: "^gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm_1/moments/variance_grad/truediv" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 2.0 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm_1/moments/SquaredDifference_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm_1/moments/SquaredDifference_grad/scalar" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm_1/moments/variance_grad/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm_1/moments/SquaredDifference_grad/sub" + op: "Sub" + input: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/add_1" + input: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm_1/moments/StopGradient" + input: "^gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm_1/moments/variance_grad/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm_1/moments/SquaredDifference_grad/mul_1" + op: "Mul" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm_1/moments/SquaredDifference_grad/Mul" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm_1/moments/SquaredDifference_grad/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm_1/moments/SquaredDifference_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 2 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm_1/moments/SquaredDifference_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm_1/moments/SquaredDifference_grad/mul_1" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm_1/moments/SquaredDifference_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm_1/moments/SquaredDifference_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm_1/moments/SquaredDifference_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm_1/moments/SquaredDifference_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm_1/moments/SquaredDifference_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm_1/moments/SquaredDifference_grad/Neg" + op: "Neg" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm_1/moments/SquaredDifference_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm_1/moments/mean_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm_1/moments/mean_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm_1/batchnorm/mul_2_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm_1/moments/mean_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm_1/moments/mean_grad/Tile/multiples" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\001\000\000\000\001\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm_1/moments/mean_grad/Tile" + op: "Tile" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm_1/moments/mean_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm_1/moments/mean_grad/Tile/multiples" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tmultiples" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm_1/moments/mean_grad/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 768.0 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm_1/moments/mean_grad/truediv" + op: "RealDiv" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm_1/moments/mean_grad/Tile" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm_1/moments/mean_grad/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/AddN_40" + op: "AddN" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm_1/batchnorm/mul_1_grad/Mul" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm_1/moments/SquaredDifference_grad/mul_1" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm_1/moments/mean_grad/truediv" + attr { + key: "N" + value { + i: 3 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm_1/batchnorm/mul_1_grad/Mul" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/ffn_1/intermediate/output/dense/add_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/ffn_1/intermediate/output/dense/add_grad/Sum" + op: "Sum" + input: "gradients/AddN_40" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/ffn_1/intermediate/output/dense/add_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/ffn_1/intermediate/output/dense/add_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 768 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/ffn_1/intermediate/output/dense/add_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/ffn_1/intermediate/output/dense/add_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/ffn_1/intermediate/output/dense/add_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape_1_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\004\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape_1_grad/Reshape" + op: "Reshape" + input: "gradients/AddN_40" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape_1_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/ffn_1/intermediate/output/dense/einsum/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape_1_grad/Reshape" + input: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/ffn_1/intermediate/output/dense/einsum/MatMul_grad/MatMul_1" + op: "MatMul" + input: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape_1_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\000\014\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/ffn_1/intermediate/output/dense/einsum/MatMul_grad/MatMul" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/ffn_1/intermediate/mul_3_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape_grad/Reshape" + input: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/ffn_1/intermediate/mul_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/ffn_1/intermediate/mul_3_grad/Mul_1" + op: "Mul" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape_grad/Reshape" + input: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/ffn_1/intermediate/dense/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/ffn_1/intermediate/mul_2_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/ffn_1/intermediate/mul_3_grad/Mul_1" + input: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/ffn_1/intermediate/add_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/ffn_1/intermediate/mul_2_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\000\000\000\000\001\000\000\000\002\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/ffn_1/intermediate/mul_2_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/ffn_1/intermediate/mul_2_grad/Mul" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/ffn_1/intermediate/mul_2_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/ffn_1/intermediate/mul_2_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/ffn_1/intermediate/mul_2_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/ffn_1/intermediate/mul_2_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/ffn_1/intermediate/mul_2_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/ffn_1/intermediate/mul_2_grad/Mul_1" + op: "Mul" + input: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/ffn_1/intermediate/mul_2/x" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/ffn_1/intermediate/mul_3_grad/Mul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/ffn_1/intermediate/add_1_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\000\000\000\000\001\000\000\000\002\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/ffn_1/intermediate/add_1_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/ffn_1/intermediate/mul_2_grad/Mul_1" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/ffn_1/intermediate/add_1_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/ffn_1/intermediate/add_1_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/ffn_1/intermediate/add_1_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/ffn_1/intermediate/add_1_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/ffn_1/intermediate/add_1_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/ffn_1/intermediate/Tanh_grad/TanhGrad" + op: "TanhGrad" + input: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/ffn_1/intermediate/Tanh" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/ffn_1/intermediate/mul_2_grad/Mul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/ffn_1/intermediate/mul_1_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/ffn_1/intermediate/Tanh_grad/TanhGrad" + input: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/ffn_1/intermediate/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/ffn_1/intermediate/mul_1_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\000\000\000\000\001\000\000\000\002\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/ffn_1/intermediate/mul_1_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/ffn_1/intermediate/mul_1_grad/Mul" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/ffn_1/intermediate/mul_1_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/ffn_1/intermediate/mul_1_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/ffn_1/intermediate/mul_1_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/ffn_1/intermediate/mul_1_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/ffn_1/intermediate/mul_1_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/ffn_1/intermediate/mul_1_grad/Mul_1" + op: "Mul" + input: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/ffn_1/intermediate/mul_1/x" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/ffn_1/intermediate/Tanh_grad/TanhGrad" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/ffn_1/intermediate/mul_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/ffn_1/intermediate/mul_1_grad/Mul_1" + input: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/ffn_1/intermediate/Pow" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/ffn_1/intermediate/mul_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\000\000\000\000\001\000\000\000\002\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/ffn_1/intermediate/mul_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/ffn_1/intermediate/mul_grad/Mul" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/ffn_1/intermediate/mul_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/ffn_1/intermediate/mul_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/ffn_1/intermediate/mul_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/ffn_1/intermediate/mul_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/ffn_1/intermediate/mul_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/ffn_1/intermediate/mul_grad/Mul_1" + op: "Mul" + input: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/ffn_1/intermediate/mul/x" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/ffn_1/intermediate/mul_1_grad/Mul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/ffn_1/intermediate/Pow_grad/mul" + op: "Mul" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/ffn_1/intermediate/mul_grad/Mul_1" + input: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/ffn_1/intermediate/Pow/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/ffn_1/intermediate/Pow_grad/sub/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/ffn_1/intermediate/Pow_grad/sub" + op: "Sub" + input: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/ffn_1/intermediate/Pow/y" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/ffn_1/intermediate/Pow_grad/sub/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/ffn_1/intermediate/Pow_grad/Pow" + op: "Pow" + input: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/ffn_1/intermediate/dense/add" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/ffn_1/intermediate/Pow_grad/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/ffn_1/intermediate/Pow_grad/mul_1" + op: "Mul" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/ffn_1/intermediate/Pow_grad/mul" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/ffn_1/intermediate/Pow_grad/Pow" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/ffn_1/intermediate/Pow_grad/Greater/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/ffn_1/intermediate/Pow_grad/Greater" + op: "Greater" + input: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/ffn_1/intermediate/dense/add" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/ffn_1/intermediate/Pow_grad/Greater/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/ffn_1/intermediate/Pow_grad/ones_like/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\000\014\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/ffn_1/intermediate/Pow_grad/ones_like/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/ffn_1/intermediate/Pow_grad/ones_like" + op: "Fill" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/ffn_1/intermediate/Pow_grad/ones_like/Shape" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/ffn_1/intermediate/Pow_grad/ones_like/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/ffn_1/intermediate/Pow_grad/Select" + op: "Select" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/ffn_1/intermediate/Pow_grad/Greater" + input: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/ffn_1/intermediate/dense/add" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/ffn_1/intermediate/Pow_grad/ones_like" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/ffn_1/intermediate/Pow_grad/Log" + op: "Log" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/ffn_1/intermediate/Pow_grad/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/ffn_1/intermediate/Pow_grad/zeros_like/shape_as_tensor" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\000\014\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/ffn_1/intermediate/Pow_grad/zeros_like/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/ffn_1/intermediate/Pow_grad/zeros_like" + op: "Fill" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/ffn_1/intermediate/Pow_grad/zeros_like/shape_as_tensor" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/ffn_1/intermediate/Pow_grad/zeros_like/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/ffn_1/intermediate/Pow_grad/Select_1" + op: "Select" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/ffn_1/intermediate/Pow_grad/Greater" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/ffn_1/intermediate/Pow_grad/Log" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/ffn_1/intermediate/Pow_grad/zeros_like" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/ffn_1/intermediate/Pow_grad/mul_2" + op: "Mul" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/ffn_1/intermediate/mul_grad/Mul_1" + input: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/ffn_1/intermediate/Pow" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/ffn_1/intermediate/Pow_grad/mul_3" + op: "Mul" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/ffn_1/intermediate/Pow_grad/mul_2" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/ffn_1/intermediate/Pow_grad/Select_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/ffn_1/intermediate/Pow_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\000\000\000\000\001\000\000\000\002\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/ffn_1/intermediate/Pow_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/ffn_1/intermediate/Pow_grad/mul_3" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/ffn_1/intermediate/Pow_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/ffn_1/intermediate/Pow_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/ffn_1/intermediate/Pow_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/ffn_1/intermediate/Pow_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/ffn_1/intermediate/Pow_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/AddN_41" + op: "AddN" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/ffn_1/intermediate/mul_3_grad/Mul" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/ffn_1/intermediate/mul_1_grad/Mul_1" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/ffn_1/intermediate/Pow_grad/mul_1" + attr { + key: "N" + value { + i: 3 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/ffn_1/intermediate/mul_3_grad/Mul" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/ffn_1/intermediate/dense/add_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/ffn_1/intermediate/dense/add_grad/Sum" + op: "Sum" + input: "gradients/AddN_41" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/ffn_1/intermediate/dense/add_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/ffn_1/intermediate/dense/add_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 3072 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/ffn_1/intermediate/dense/add_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/ffn_1/intermediate/dense/add_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/ffn_1/intermediate/dense/add_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/ffn_1/intermediate/dense/einsum/Reshape_1_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\004\000\000\000\014\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/ffn_1/intermediate/dense/einsum/Reshape_1_grad/Reshape" + op: "Reshape" + input: "gradients/AddN_41" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/ffn_1/intermediate/dense/einsum/Reshape_1_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/ffn_1/intermediate/dense/einsum/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/ffn_1/intermediate/dense/einsum/Reshape_1_grad/Reshape" + input: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/ffn_1/intermediate/dense/einsum/MatMul_grad/MatMul_1" + op: "MatMul" + input: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/ffn_1/intermediate/dense/einsum/Reshape" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/ffn_1/intermediate/dense/einsum/Reshape_1_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/ffn_1/intermediate/dense/einsum/Reshape_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/ffn_1/intermediate/dense/einsum/Reshape_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/ffn_1/intermediate/dense/einsum/MatMul_grad/MatMul" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/ffn_1/intermediate/dense/einsum/Reshape_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/AddN_42" + op: "AddN" + input: "gradients/AddN_40" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/ffn_1/intermediate/dense/einsum/Reshape_grad/Reshape" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm_1/batchnorm/mul_1_grad/Mul" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm/batchnorm/mul_1_grad/Mul" + op: "Mul" + input: "gradients/AddN_42" + input: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm/batchnorm/mul_1_grad/Mul_1" + op: "Mul" + input: "gradients/AddN_42" + input: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm/batchnorm/sub_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm/batchnorm/sub_grad/Sum" + op: "Sum" + input: "gradients/AddN_42" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm/batchnorm/sub_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm/batchnorm/sub_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 768 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm/batchnorm/sub_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm/batchnorm/sub_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm/batchnorm/sub_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm/batchnorm/sub_grad/Neg" + op: "Neg" + input: "gradients/AddN_42" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm/batchnorm/mul_2_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm/batchnorm/sub_grad/Neg" + input: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm/batchnorm/mul_2_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 2 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm/batchnorm/mul_2_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm/batchnorm/mul_2_grad/Mul" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm/batchnorm/mul_2_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm/batchnorm/mul_2_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm/batchnorm/mul_2_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm/batchnorm/mul_2_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm/batchnorm/mul_2_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm/batchnorm/mul_2_grad/Mul_1" + op: "Mul" + input: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm/moments/mean" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm/batchnorm/sub_grad/Neg" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/AddN_43" + op: "AddN" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm/batchnorm/mul_1_grad/Mul_1" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm/batchnorm/mul_2_grad/Mul_1" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm/batchnorm/mul_1_grad/Mul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm/batchnorm/mul_grad/Mul" + op: "Mul" + input: "gradients/AddN_43" + input: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm/gamma/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm/batchnorm/mul_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 2 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm/batchnorm/mul_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm/batchnorm/mul_grad/Mul" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm/batchnorm/mul_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm/batchnorm/mul_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm/batchnorm/mul_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm/batchnorm/mul_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm/batchnorm/mul_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm/batchnorm/mul_grad/Mul_1" + op: "Mul" + input: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm/batchnorm/Rsqrt" + input: "gradients/AddN_43" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm/batchnorm/mul_grad/Sum_1/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm/batchnorm/mul_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm/batchnorm/mul_grad/Mul_1" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm/batchnorm/mul_grad/Sum_1/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm/batchnorm/mul_grad/Reshape_1/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 768 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm/batchnorm/mul_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm/batchnorm/mul_grad/Sum_1" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm/batchnorm/mul_grad/Reshape_1/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad" + op: "RsqrtGrad" + input: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm/batchnorm/Rsqrt" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm/batchnorm/mul_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm/batchnorm/add_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\000\000\000\000\001\000\000\000\002\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm/batchnorm/add_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm/batchnorm/add_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm/batchnorm/add_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm/batchnorm/add_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm/batchnorm/add_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm/batchnorm/add_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm/moments/variance_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm/moments/variance_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm/moments/variance_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm/moments/variance_grad/Tile/multiples" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\001\000\000\000\001\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm/moments/variance_grad/Tile" + op: "Tile" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm/moments/variance_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm/moments/variance_grad/Tile/multiples" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tmultiples" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm/moments/variance_grad/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 768.0 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm/moments/variance_grad/truediv" + op: "RealDiv" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm/moments/variance_grad/Tile" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm/moments/variance_grad/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm/moments/SquaredDifference_grad/scalar" + op: "Const" + input: "^gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm/moments/variance_grad/truediv" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 2.0 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm/moments/SquaredDifference_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm/moments/SquaredDifference_grad/scalar" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm/moments/variance_grad/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm/moments/SquaredDifference_grad/sub" + op: "Sub" + input: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/add" + input: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm/moments/StopGradient" + input: "^gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm/moments/variance_grad/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm/moments/SquaredDifference_grad/mul_1" + op: "Mul" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm/moments/SquaredDifference_grad/Mul" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm/moments/SquaredDifference_grad/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm/moments/SquaredDifference_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 2 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm/moments/SquaredDifference_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm/moments/SquaredDifference_grad/mul_1" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm/moments/SquaredDifference_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm/moments/SquaredDifference_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm/moments/SquaredDifference_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm/moments/SquaredDifference_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm/moments/SquaredDifference_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm/moments/SquaredDifference_grad/Neg" + op: "Neg" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm/moments/SquaredDifference_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm/moments/mean_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm/moments/mean_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm/batchnorm/mul_2_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm/moments/mean_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm/moments/mean_grad/Tile/multiples" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\001\000\000\000\001\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm/moments/mean_grad/Tile" + op: "Tile" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm/moments/mean_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm/moments/mean_grad/Tile/multiples" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tmultiples" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm/moments/mean_grad/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 768.0 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm/moments/mean_grad/truediv" + op: "RealDiv" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm/moments/mean_grad/Tile" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm/moments/mean_grad/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/AddN_44" + op: "AddN" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm/batchnorm/mul_1_grad/Mul" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm/moments/SquaredDifference_grad/mul_1" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm/moments/mean_grad/truediv" + attr { + key: "N" + value { + i: 3 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm/batchnorm/mul_1_grad/Mul" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/output/dense/add_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/output/dense/add_grad/Sum" + op: "Sum" + input: "gradients/AddN_44" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/output/dense/add_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/output/dense/add_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 768 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/output/dense/add_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/output/dense/add_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/output/dense/add_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/output/dense/einsum/Reshape_2_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\004\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/output/dense/einsum/Reshape_2_grad/Reshape" + op: "Reshape" + input: "gradients/AddN_44" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/output/dense/einsum/Reshape_2_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/output/dense/einsum/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/output/dense/einsum/Reshape_2_grad/Reshape" + input: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/output/dense/einsum/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/output/dense/einsum/MatMul_grad/MatMul_1" + op: "MatMul" + input: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/output/dense/einsum/Reshape" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/output/dense/einsum/Reshape_2_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/output/dense/einsum/Reshape_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\020\000\000\000@\000\000\000@\000\000\000\014\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/output/dense/einsum/Reshape_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/output/dense/einsum/MatMul_grad/MatMul" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/output/dense/einsum/Reshape_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 64 + } + dim { + size: 12 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/output/dense/einsum/Reshape_1_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "@\000\000\000\014\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/output/dense/einsum/Reshape_1_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/output/dense/einsum/MatMul_grad/MatMul_1" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/output/dense/einsum/Reshape_1_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 64 + } + dim { + size: 12 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/output/dense/einsum/transpose_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/output/dense/einsum/transpose/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/output/dense/einsum/transpose_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/output/dense/einsum/Reshape_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/output/dense/einsum/transpose_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/output/dense/einsum/transpose_1_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/output/dense/einsum/transpose_1/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/output/dense/einsum/transpose_1_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/output/dense/einsum/Reshape_1_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/output/dense/einsum/transpose_1_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/transpose_3_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/transpose_3/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/transpose_3_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/output/dense/einsum/transpose_grad/transpose" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/transpose_3_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/output/dense/Reshape_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/output/dense/Reshape_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/output/dense/einsum/transpose_1_grad/transpose" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/output/dense/Reshape_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/MatMul_2_grad/MatMul" + op: "BatchMatMulV2" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/transpose_3_grad/transpose" + input: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/transpose_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: false + } + } + attr { + key: "adj_y" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/MatMul_2_grad/MatMul_1" + op: "BatchMatMulV2" + input: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/attention_probs" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/transpose_3_grad/transpose" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: true + } + } + attr { + key: "adj_y" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/attention_probs_grad/mul" + op: "Mul" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/MatMul_2_grad/MatMul" + input: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/attention_probs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/attention_probs_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: -1 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/attention_probs_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/attention_probs_grad/mul" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/attention_probs_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/attention_probs_grad/sub" + op: "Sub" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/MatMul_2_grad/MatMul" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/attention_probs_grad/Sum" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/attention_probs_grad/mul_1" + op: "Mul" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/attention_probs_grad/sub" + input: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/attention_probs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/transpose_2_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/transpose_2/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/transpose_2_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/MatMul_2_grad/MatMul_1" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/transpose_2_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/add_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/add_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/attention_probs_grad/mul_1" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/add_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/add_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\020\000\000\000\001\000\000\000@\000\000\000@\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/add_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/add_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/add_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 1 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/value/add_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/value/add_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/transpose_2_grad/transpose" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/value/add_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/value/add_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\014\000\000\000@\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/value/add_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/value/add_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/value/add_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/Mul_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/attention_probs_grad/mul_1" + input: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/Mul/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/Mul_grad/Mul_1" + op: "Mul" + input: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/MatMul" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/attention_probs_grad/mul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/Mul_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\000\000\000\000\001\000\000\000\002\000\000\000\003\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/Mul_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/Mul_grad/Mul_1" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/Mul_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/Mul_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/Mul_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/Mul_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/Mul_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/value/einsum/transpose_1_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/value/einsum/transpose_1/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/value/einsum/transpose_1_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/transpose_2_grad/transpose" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/value/einsum/transpose_1_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 64 + } + dim { + size: 12 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/value/Reshape_1_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 768 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/value/Reshape_1_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/value/add_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/value/Reshape_1_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/MatMul_grad/MatMul" + op: "BatchMatMulV2" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/Mul_grad/Mul" + input: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/transpose_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: false + } + } + attr { + key: "adj_y" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/MatMul_grad/MatMul_1" + op: "BatchMatMulV2" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/Mul_grad/Mul" + input: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/transpose" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: true + } + } + attr { + key: "adj_y" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/value/einsum/Reshape_2_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\004\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/value/einsum/Reshape_2_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/value/einsum/transpose_1_grad/transpose" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/value/einsum/Reshape_2_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/transpose_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/transpose/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/transpose_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/MatMul_grad/MatMul" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/transpose_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/transpose_1_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/transpose_1/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/transpose_1_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/MatMul_grad/MatMul_1" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/transpose_1_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/value/einsum/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/value/einsum/Reshape_2_grad/Reshape" + input: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/value/einsum/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/value/einsum/MatMul_grad/MatMul_1" + op: "MatMul" + input: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/value/einsum/Reshape" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/value/einsum/Reshape_2_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/query/add_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/query/add_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/transpose_grad/transpose" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/query/add_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/query/add_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\014\000\000\000@\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/query/add_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/query/add_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/query/add_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/key/add_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/key/add_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/transpose_1_grad/transpose" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/key/add_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/key/add_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\014\000\000\000@\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/key/add_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/key/add_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/key/add_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/value/einsum/Reshape_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/value/einsum/Reshape_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/value/einsum/MatMul_grad/MatMul" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/value/einsum/Reshape_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/value/einsum/Reshape_1_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\000\003\000\000@\000\000\000\014\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/value/einsum/Reshape_1_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/value/einsum/MatMul_grad/MatMul_1" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/value/einsum/Reshape_1_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 64 + } + dim { + size: 12 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/query/einsum/transpose_1_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/query/einsum/transpose_1/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/query/einsum/transpose_1_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/transpose_grad/transpose" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/query/einsum/transpose_1_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 64 + } + dim { + size: 12 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/query/Reshape_1_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 768 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/query/Reshape_1_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/query/add_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/query/Reshape_1_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/key/einsum/transpose_1_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/key/einsum/transpose_1/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/key/einsum/transpose_1_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/transpose_1_grad/transpose" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/key/einsum/transpose_1_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 64 + } + dim { + size: 12 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/key/Reshape_1_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 768 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/key/Reshape_1_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/key/add_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/key/Reshape_1_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/value/einsum/transpose_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/value/einsum/transpose/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/value/einsum/transpose_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/value/einsum/Reshape_1_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/value/einsum/transpose_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/query/einsum/Reshape_2_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\004\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/query/einsum/Reshape_2_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/query/einsum/transpose_1_grad/transpose" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/query/einsum/Reshape_2_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/key/einsum/Reshape_2_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\004\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/key/einsum/Reshape_2_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/key/einsum/transpose_1_grad/transpose" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/key/einsum/Reshape_2_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/value/Reshape_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/value/Reshape_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/value/einsum/transpose_grad/transpose" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/value/Reshape_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/query/einsum/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/query/einsum/Reshape_2_grad/Reshape" + input: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/query/einsum/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/query/einsum/MatMul_grad/MatMul_1" + op: "MatMul" + input: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/query/einsum/Reshape" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/query/einsum/Reshape_2_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/key/einsum/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/key/einsum/Reshape_2_grad/Reshape" + input: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/key/einsum/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/key/einsum/MatMul_grad/MatMul_1" + op: "MatMul" + input: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/key/einsum/Reshape" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/key/einsum/Reshape_2_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/query/einsum/Reshape_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/query/einsum/Reshape_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/query/einsum/MatMul_grad/MatMul" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/query/einsum/Reshape_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/query/einsum/Reshape_1_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\000\003\000\000@\000\000\000\014\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/query/einsum/Reshape_1_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/query/einsum/MatMul_grad/MatMul_1" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/query/einsum/Reshape_1_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 64 + } + dim { + size: 12 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/key/einsum/Reshape_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/key/einsum/Reshape_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/key/einsum/MatMul_grad/MatMul" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/key/einsum/Reshape_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/key/einsum/Reshape_1_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\000\003\000\000@\000\000\000\014\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/key/einsum/Reshape_1_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/key/einsum/MatMul_grad/MatMul_1" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/key/einsum/Reshape_1_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 64 + } + dim { + size: 12 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/query/einsum/transpose_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/query/einsum/transpose/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/query/einsum/transpose_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/query/einsum/Reshape_1_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/query/einsum/transpose_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/AddN_45" + op: "AddN" + input: "gradients/AddN_44" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/value/einsum/Reshape_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/query/einsum/Reshape_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/key/einsum/Reshape_grad/Reshape" + attr { + key: "N" + value { + i: 4 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm/batchnorm/mul_1_grad/Mul" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/key/einsum/transpose_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/key/einsum/transpose/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/key/einsum/transpose_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/key/einsum/Reshape_1_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/key/einsum/transpose_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/query/Reshape_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/query/Reshape_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/query/einsum/transpose_grad/transpose" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/query/Reshape_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm_1/batchnorm/mul_1_grad/Mul" + op: "Mul" + input: "gradients/AddN_45" + input: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm_1/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm_1/batchnorm/mul_1_grad/Mul_1" + op: "Mul" + input: "gradients/AddN_45" + input: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/add_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm_1/batchnorm/sub_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm_1/batchnorm/sub_grad/Sum" + op: "Sum" + input: "gradients/AddN_45" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm_1/batchnorm/sub_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm_1/batchnorm/sub_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 768 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm_1/batchnorm/sub_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm_1/batchnorm/sub_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm_1/batchnorm/sub_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm_1/batchnorm/sub_grad/Neg" + op: "Neg" + input: "gradients/AddN_45" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/key/Reshape_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/key/Reshape_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/key/einsum/transpose_grad/transpose" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/key/Reshape_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm_1/batchnorm/mul_2_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm_1/batchnorm/sub_grad/Neg" + input: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm_1/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm_1/batchnorm/mul_2_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 2 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm_1/batchnorm/mul_2_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm_1/batchnorm/mul_2_grad/Mul" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm_1/batchnorm/mul_2_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm_1/batchnorm/mul_2_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm_1/batchnorm/mul_2_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm_1/batchnorm/mul_2_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm_1/batchnorm/mul_2_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm_1/batchnorm/mul_2_grad/Mul_1" + op: "Mul" + input: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm_1/moments/mean" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm_1/batchnorm/sub_grad/Neg" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/AddN_46" + op: "AddN" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm_1/batchnorm/mul_1_grad/Mul_1" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm_1/batchnorm/mul_2_grad/Mul_1" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm_1/batchnorm/mul_1_grad/Mul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Mul" + op: "Mul" + input: "gradients/AddN_46" + input: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm_1/gamma/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 2 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Mul" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Mul_1" + op: "Mul" + input: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm_1/batchnorm/Rsqrt" + input: "gradients/AddN_46" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Sum_1/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Mul_1" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Sum_1/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Reshape_1/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 768 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Sum_1" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Reshape_1/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm_1/batchnorm/Rsqrt_grad/RsqrtGrad" + op: "RsqrtGrad" + input: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm_1/batchnorm/Rsqrt" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm_1/batchnorm/add_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\000\000\000\000\001\000\000\000\002\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm_1/batchnorm/add_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm_1/batchnorm/Rsqrt_grad/RsqrtGrad" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm_1/batchnorm/add_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm_1/batchnorm/add_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm_1/batchnorm/add_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm_1/batchnorm/add_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm_1/batchnorm/add_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm_1/moments/variance_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm_1/moments/variance_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm_1/batchnorm/Rsqrt_grad/RsqrtGrad" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm_1/moments/variance_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm_1/moments/variance_grad/Tile/multiples" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\001\000\000\000\001\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm_1/moments/variance_grad/Tile" + op: "Tile" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm_1/moments/variance_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm_1/moments/variance_grad/Tile/multiples" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tmultiples" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm_1/moments/variance_grad/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 768.0 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm_1/moments/variance_grad/truediv" + op: "RealDiv" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm_1/moments/variance_grad/Tile" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm_1/moments/variance_grad/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm_1/moments/SquaredDifference_grad/scalar" + op: "Const" + input: "^gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm_1/moments/variance_grad/truediv" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 2.0 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm_1/moments/SquaredDifference_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm_1/moments/SquaredDifference_grad/scalar" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm_1/moments/variance_grad/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm_1/moments/SquaredDifference_grad/sub" + op: "Sub" + input: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/add_1" + input: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm_1/moments/StopGradient" + input: "^gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm_1/moments/variance_grad/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm_1/moments/SquaredDifference_grad/mul_1" + op: "Mul" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm_1/moments/SquaredDifference_grad/Mul" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm_1/moments/SquaredDifference_grad/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm_1/moments/SquaredDifference_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 2 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm_1/moments/SquaredDifference_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm_1/moments/SquaredDifference_grad/mul_1" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm_1/moments/SquaredDifference_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm_1/moments/SquaredDifference_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm_1/moments/SquaredDifference_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm_1/moments/SquaredDifference_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm_1/moments/SquaredDifference_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm_1/moments/SquaredDifference_grad/Neg" + op: "Neg" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm_1/moments/SquaredDifference_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm_1/moments/mean_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm_1/moments/mean_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm_1/batchnorm/mul_2_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm_1/moments/mean_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm_1/moments/mean_grad/Tile/multiples" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\001\000\000\000\001\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm_1/moments/mean_grad/Tile" + op: "Tile" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm_1/moments/mean_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm_1/moments/mean_grad/Tile/multiples" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tmultiples" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm_1/moments/mean_grad/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 768.0 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm_1/moments/mean_grad/truediv" + op: "RealDiv" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm_1/moments/mean_grad/Tile" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm_1/moments/mean_grad/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/AddN_47" + op: "AddN" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm_1/batchnorm/mul_1_grad/Mul" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm_1/moments/SquaredDifference_grad/mul_1" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm_1/moments/mean_grad/truediv" + attr { + key: "N" + value { + i: 3 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm_1/batchnorm/mul_1_grad/Mul" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/ffn_1/intermediate/output/dense/add_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/ffn_1/intermediate/output/dense/add_grad/Sum" + op: "Sum" + input: "gradients/AddN_47" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/ffn_1/intermediate/output/dense/add_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/ffn_1/intermediate/output/dense/add_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 768 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/ffn_1/intermediate/output/dense/add_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/ffn_1/intermediate/output/dense/add_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/ffn_1/intermediate/output/dense/add_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape_1_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\004\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape_1_grad/Reshape" + op: "Reshape" + input: "gradients/AddN_47" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape_1_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/ffn_1/intermediate/output/dense/einsum/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape_1_grad/Reshape" + input: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/ffn_1/intermediate/output/dense/einsum/MatMul_grad/MatMul_1" + op: "MatMul" + input: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape_1_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\000\014\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/ffn_1/intermediate/output/dense/einsum/MatMul_grad/MatMul" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/ffn_1/intermediate/mul_3_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape_grad/Reshape" + input: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/ffn_1/intermediate/mul_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/ffn_1/intermediate/mul_3_grad/Mul_1" + op: "Mul" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape_grad/Reshape" + input: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/ffn_1/intermediate/dense/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/ffn_1/intermediate/mul_2_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/ffn_1/intermediate/mul_3_grad/Mul_1" + input: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/ffn_1/intermediate/add_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/ffn_1/intermediate/mul_2_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\000\000\000\000\001\000\000\000\002\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/ffn_1/intermediate/mul_2_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/ffn_1/intermediate/mul_2_grad/Mul" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/ffn_1/intermediate/mul_2_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/ffn_1/intermediate/mul_2_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/ffn_1/intermediate/mul_2_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/ffn_1/intermediate/mul_2_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/ffn_1/intermediate/mul_2_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/ffn_1/intermediate/mul_2_grad/Mul_1" + op: "Mul" + input: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/ffn_1/intermediate/mul_2/x" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/ffn_1/intermediate/mul_3_grad/Mul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/ffn_1/intermediate/add_1_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\000\000\000\000\001\000\000\000\002\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/ffn_1/intermediate/add_1_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/ffn_1/intermediate/mul_2_grad/Mul_1" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/ffn_1/intermediate/add_1_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/ffn_1/intermediate/add_1_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/ffn_1/intermediate/add_1_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/ffn_1/intermediate/add_1_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/ffn_1/intermediate/add_1_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/ffn_1/intermediate/Tanh_grad/TanhGrad" + op: "TanhGrad" + input: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/ffn_1/intermediate/Tanh" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/ffn_1/intermediate/mul_2_grad/Mul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/ffn_1/intermediate/mul_1_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/ffn_1/intermediate/Tanh_grad/TanhGrad" + input: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/ffn_1/intermediate/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/ffn_1/intermediate/mul_1_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\000\000\000\000\001\000\000\000\002\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/ffn_1/intermediate/mul_1_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/ffn_1/intermediate/mul_1_grad/Mul" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/ffn_1/intermediate/mul_1_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/ffn_1/intermediate/mul_1_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/ffn_1/intermediate/mul_1_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/ffn_1/intermediate/mul_1_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/ffn_1/intermediate/mul_1_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/ffn_1/intermediate/mul_1_grad/Mul_1" + op: "Mul" + input: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/ffn_1/intermediate/mul_1/x" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/ffn_1/intermediate/Tanh_grad/TanhGrad" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/ffn_1/intermediate/mul_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/ffn_1/intermediate/mul_1_grad/Mul_1" + input: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/ffn_1/intermediate/Pow" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/ffn_1/intermediate/mul_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\000\000\000\000\001\000\000\000\002\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/ffn_1/intermediate/mul_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/ffn_1/intermediate/mul_grad/Mul" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/ffn_1/intermediate/mul_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/ffn_1/intermediate/mul_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/ffn_1/intermediate/mul_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/ffn_1/intermediate/mul_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/ffn_1/intermediate/mul_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/ffn_1/intermediate/mul_grad/Mul_1" + op: "Mul" + input: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/ffn_1/intermediate/mul/x" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/ffn_1/intermediate/mul_1_grad/Mul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/ffn_1/intermediate/Pow_grad/mul" + op: "Mul" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/ffn_1/intermediate/mul_grad/Mul_1" + input: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/ffn_1/intermediate/Pow/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/ffn_1/intermediate/Pow_grad/sub/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/ffn_1/intermediate/Pow_grad/sub" + op: "Sub" + input: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/ffn_1/intermediate/Pow/y" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/ffn_1/intermediate/Pow_grad/sub/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/ffn_1/intermediate/Pow_grad/Pow" + op: "Pow" + input: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/ffn_1/intermediate/dense/add" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/ffn_1/intermediate/Pow_grad/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/ffn_1/intermediate/Pow_grad/mul_1" + op: "Mul" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/ffn_1/intermediate/Pow_grad/mul" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/ffn_1/intermediate/Pow_grad/Pow" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/ffn_1/intermediate/Pow_grad/Greater/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/ffn_1/intermediate/Pow_grad/Greater" + op: "Greater" + input: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/ffn_1/intermediate/dense/add" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/ffn_1/intermediate/Pow_grad/Greater/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/ffn_1/intermediate/Pow_grad/ones_like/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\000\014\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/ffn_1/intermediate/Pow_grad/ones_like/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/ffn_1/intermediate/Pow_grad/ones_like" + op: "Fill" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/ffn_1/intermediate/Pow_grad/ones_like/Shape" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/ffn_1/intermediate/Pow_grad/ones_like/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/ffn_1/intermediate/Pow_grad/Select" + op: "Select" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/ffn_1/intermediate/Pow_grad/Greater" + input: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/ffn_1/intermediate/dense/add" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/ffn_1/intermediate/Pow_grad/ones_like" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/ffn_1/intermediate/Pow_grad/Log" + op: "Log" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/ffn_1/intermediate/Pow_grad/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/ffn_1/intermediate/Pow_grad/zeros_like/shape_as_tensor" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\000\014\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/ffn_1/intermediate/Pow_grad/zeros_like/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/ffn_1/intermediate/Pow_grad/zeros_like" + op: "Fill" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/ffn_1/intermediate/Pow_grad/zeros_like/shape_as_tensor" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/ffn_1/intermediate/Pow_grad/zeros_like/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/ffn_1/intermediate/Pow_grad/Select_1" + op: "Select" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/ffn_1/intermediate/Pow_grad/Greater" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/ffn_1/intermediate/Pow_grad/Log" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/ffn_1/intermediate/Pow_grad/zeros_like" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/ffn_1/intermediate/Pow_grad/mul_2" + op: "Mul" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/ffn_1/intermediate/mul_grad/Mul_1" + input: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/ffn_1/intermediate/Pow" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/ffn_1/intermediate/Pow_grad/mul_3" + op: "Mul" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/ffn_1/intermediate/Pow_grad/mul_2" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/ffn_1/intermediate/Pow_grad/Select_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/ffn_1/intermediate/Pow_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\000\000\000\000\001\000\000\000\002\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/ffn_1/intermediate/Pow_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/ffn_1/intermediate/Pow_grad/mul_3" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/ffn_1/intermediate/Pow_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/ffn_1/intermediate/Pow_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/ffn_1/intermediate/Pow_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/ffn_1/intermediate/Pow_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/ffn_1/intermediate/Pow_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/AddN_48" + op: "AddN" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/ffn_1/intermediate/mul_3_grad/Mul" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/ffn_1/intermediate/mul_1_grad/Mul_1" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/ffn_1/intermediate/Pow_grad/mul_1" + attr { + key: "N" + value { + i: 3 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/ffn_1/intermediate/mul_3_grad/Mul" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/ffn_1/intermediate/dense/add_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/ffn_1/intermediate/dense/add_grad/Sum" + op: "Sum" + input: "gradients/AddN_48" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/ffn_1/intermediate/dense/add_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/ffn_1/intermediate/dense/add_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 3072 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/ffn_1/intermediate/dense/add_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/ffn_1/intermediate/dense/add_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/ffn_1/intermediate/dense/add_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/ffn_1/intermediate/dense/einsum/Reshape_1_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\004\000\000\000\014\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/ffn_1/intermediate/dense/einsum/Reshape_1_grad/Reshape" + op: "Reshape" + input: "gradients/AddN_48" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/ffn_1/intermediate/dense/einsum/Reshape_1_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/ffn_1/intermediate/dense/einsum/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/ffn_1/intermediate/dense/einsum/Reshape_1_grad/Reshape" + input: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/ffn_1/intermediate/dense/einsum/MatMul_grad/MatMul_1" + op: "MatMul" + input: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/ffn_1/intermediate/dense/einsum/Reshape" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/ffn_1/intermediate/dense/einsum/Reshape_1_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/ffn_1/intermediate/dense/einsum/Reshape_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/ffn_1/intermediate/dense/einsum/Reshape_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/ffn_1/intermediate/dense/einsum/MatMul_grad/MatMul" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/ffn_1/intermediate/dense/einsum/Reshape_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/AddN_49" + op: "AddN" + input: "gradients/AddN_47" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/ffn_1/intermediate/dense/einsum/Reshape_grad/Reshape" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm_1/batchnorm/mul_1_grad/Mul" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm/batchnorm/mul_1_grad/Mul" + op: "Mul" + input: "gradients/AddN_49" + input: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm/batchnorm/mul_1_grad/Mul_1" + op: "Mul" + input: "gradients/AddN_49" + input: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm/batchnorm/sub_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm/batchnorm/sub_grad/Sum" + op: "Sum" + input: "gradients/AddN_49" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm/batchnorm/sub_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm/batchnorm/sub_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 768 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm/batchnorm/sub_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm/batchnorm/sub_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm/batchnorm/sub_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm/batchnorm/sub_grad/Neg" + op: "Neg" + input: "gradients/AddN_49" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm/batchnorm/mul_2_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm/batchnorm/sub_grad/Neg" + input: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm/batchnorm/mul_2_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 2 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm/batchnorm/mul_2_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm/batchnorm/mul_2_grad/Mul" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm/batchnorm/mul_2_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm/batchnorm/mul_2_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm/batchnorm/mul_2_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm/batchnorm/mul_2_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm/batchnorm/mul_2_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm/batchnorm/mul_2_grad/Mul_1" + op: "Mul" + input: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm/moments/mean" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm/batchnorm/sub_grad/Neg" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/AddN_50" + op: "AddN" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm/batchnorm/mul_1_grad/Mul_1" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm/batchnorm/mul_2_grad/Mul_1" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm/batchnorm/mul_1_grad/Mul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm/batchnorm/mul_grad/Mul" + op: "Mul" + input: "gradients/AddN_50" + input: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm/gamma/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm/batchnorm/mul_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 2 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm/batchnorm/mul_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm/batchnorm/mul_grad/Mul" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm/batchnorm/mul_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm/batchnorm/mul_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm/batchnorm/mul_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm/batchnorm/mul_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm/batchnorm/mul_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm/batchnorm/mul_grad/Mul_1" + op: "Mul" + input: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm/batchnorm/Rsqrt" + input: "gradients/AddN_50" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm/batchnorm/mul_grad/Sum_1/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm/batchnorm/mul_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm/batchnorm/mul_grad/Mul_1" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm/batchnorm/mul_grad/Sum_1/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm/batchnorm/mul_grad/Reshape_1/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 768 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm/batchnorm/mul_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm/batchnorm/mul_grad/Sum_1" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm/batchnorm/mul_grad/Reshape_1/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad" + op: "RsqrtGrad" + input: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm/batchnorm/Rsqrt" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm/batchnorm/mul_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm/batchnorm/add_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\000\000\000\000\001\000\000\000\002\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm/batchnorm/add_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm/batchnorm/add_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm/batchnorm/add_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm/batchnorm/add_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm/batchnorm/add_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm/batchnorm/add_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm/moments/variance_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm/moments/variance_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm/moments/variance_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm/moments/variance_grad/Tile/multiples" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\001\000\000\000\001\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm/moments/variance_grad/Tile" + op: "Tile" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm/moments/variance_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm/moments/variance_grad/Tile/multiples" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tmultiples" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm/moments/variance_grad/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 768.0 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm/moments/variance_grad/truediv" + op: "RealDiv" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm/moments/variance_grad/Tile" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm/moments/variance_grad/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm/moments/SquaredDifference_grad/scalar" + op: "Const" + input: "^gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm/moments/variance_grad/truediv" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 2.0 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm/moments/SquaredDifference_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm/moments/SquaredDifference_grad/scalar" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm/moments/variance_grad/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm/moments/SquaredDifference_grad/sub" + op: "Sub" + input: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/add" + input: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm/moments/StopGradient" + input: "^gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm/moments/variance_grad/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm/moments/SquaredDifference_grad/mul_1" + op: "Mul" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm/moments/SquaredDifference_grad/Mul" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm/moments/SquaredDifference_grad/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm/moments/SquaredDifference_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 2 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm/moments/SquaredDifference_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm/moments/SquaredDifference_grad/mul_1" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm/moments/SquaredDifference_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm/moments/SquaredDifference_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm/moments/SquaredDifference_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm/moments/SquaredDifference_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm/moments/SquaredDifference_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm/moments/SquaredDifference_grad/Neg" + op: "Neg" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm/moments/SquaredDifference_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm/moments/mean_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm/moments/mean_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm/batchnorm/mul_2_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm/moments/mean_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm/moments/mean_grad/Tile/multiples" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\001\000\000\000\001\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm/moments/mean_grad/Tile" + op: "Tile" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm/moments/mean_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm/moments/mean_grad/Tile/multiples" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tmultiples" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm/moments/mean_grad/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 768.0 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm/moments/mean_grad/truediv" + op: "RealDiv" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm/moments/mean_grad/Tile" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm/moments/mean_grad/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/AddN_51" + op: "AddN" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm/batchnorm/mul_1_grad/Mul" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm/moments/SquaredDifference_grad/mul_1" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm/moments/mean_grad/truediv" + attr { + key: "N" + value { + i: 3 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm/batchnorm/mul_1_grad/Mul" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/output/dense/add_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/output/dense/add_grad/Sum" + op: "Sum" + input: "gradients/AddN_51" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/output/dense/add_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/output/dense/add_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 768 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/output/dense/add_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/output/dense/add_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/output/dense/add_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/output/dense/einsum/Reshape_2_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\004\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/output/dense/einsum/Reshape_2_grad/Reshape" + op: "Reshape" + input: "gradients/AddN_51" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/output/dense/einsum/Reshape_2_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/output/dense/einsum/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/output/dense/einsum/Reshape_2_grad/Reshape" + input: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/output/dense/einsum/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/output/dense/einsum/MatMul_grad/MatMul_1" + op: "MatMul" + input: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/output/dense/einsum/Reshape" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/output/dense/einsum/Reshape_2_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/output/dense/einsum/Reshape_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\020\000\000\000@\000\000\000@\000\000\000\014\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/output/dense/einsum/Reshape_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/output/dense/einsum/MatMul_grad/MatMul" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/output/dense/einsum/Reshape_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 64 + } + dim { + size: 12 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/output/dense/einsum/Reshape_1_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "@\000\000\000\014\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/output/dense/einsum/Reshape_1_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/output/dense/einsum/MatMul_grad/MatMul_1" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/output/dense/einsum/Reshape_1_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 64 + } + dim { + size: 12 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/output/dense/einsum/transpose_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/output/dense/einsum/transpose/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/output/dense/einsum/transpose_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/output/dense/einsum/Reshape_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/output/dense/einsum/transpose_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/output/dense/einsum/transpose_1_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/output/dense/einsum/transpose_1/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/output/dense/einsum/transpose_1_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/output/dense/einsum/Reshape_1_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/output/dense/einsum/transpose_1_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/transpose_3_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/transpose_3/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/transpose_3_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/output/dense/einsum/transpose_grad/transpose" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/transpose_3_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/output/dense/Reshape_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/output/dense/Reshape_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/output/dense/einsum/transpose_1_grad/transpose" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/output/dense/Reshape_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/MatMul_2_grad/MatMul" + op: "BatchMatMulV2" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/transpose_3_grad/transpose" + input: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/transpose_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: false + } + } + attr { + key: "adj_y" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/MatMul_2_grad/MatMul_1" + op: "BatchMatMulV2" + input: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/attention_probs" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/transpose_3_grad/transpose" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: true + } + } + attr { + key: "adj_y" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/attention_probs_grad/mul" + op: "Mul" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/MatMul_2_grad/MatMul" + input: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/attention_probs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/attention_probs_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: -1 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/attention_probs_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/attention_probs_grad/mul" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/attention_probs_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/attention_probs_grad/sub" + op: "Sub" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/MatMul_2_grad/MatMul" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/attention_probs_grad/Sum" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/attention_probs_grad/mul_1" + op: "Mul" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/attention_probs_grad/sub" + input: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/attention_probs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/transpose_2_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/transpose_2/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/transpose_2_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/MatMul_2_grad/MatMul_1" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/transpose_2_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/add_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/add_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/attention_probs_grad/mul_1" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/add_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/add_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\020\000\000\000\001\000\000\000@\000\000\000@\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/add_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/add_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/add_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 1 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/value/add_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/value/add_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/transpose_2_grad/transpose" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/value/add_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/value/add_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\014\000\000\000@\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/value/add_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/value/add_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/value/add_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/Mul_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/attention_probs_grad/mul_1" + input: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/Mul/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/Mul_grad/Mul_1" + op: "Mul" + input: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/MatMul" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/attention_probs_grad/mul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/Mul_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\000\000\000\000\001\000\000\000\002\000\000\000\003\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/Mul_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/Mul_grad/Mul_1" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/Mul_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/Mul_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/Mul_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/Mul_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/Mul_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/value/einsum/transpose_1_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/value/einsum/transpose_1/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/value/einsum/transpose_1_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/transpose_2_grad/transpose" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/value/einsum/transpose_1_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 64 + } + dim { + size: 12 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/value/Reshape_1_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 768 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/value/Reshape_1_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/value/add_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/value/Reshape_1_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/MatMul_grad/MatMul" + op: "BatchMatMulV2" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/Mul_grad/Mul" + input: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/transpose_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: false + } + } + attr { + key: "adj_y" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/MatMul_grad/MatMul_1" + op: "BatchMatMulV2" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/Mul_grad/Mul" + input: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/transpose" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: true + } + } + attr { + key: "adj_y" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/value/einsum/Reshape_2_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\004\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/value/einsum/Reshape_2_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/value/einsum/transpose_1_grad/transpose" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/value/einsum/Reshape_2_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/transpose_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/transpose/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/transpose_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/MatMul_grad/MatMul" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/transpose_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/transpose_1_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/transpose_1/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/transpose_1_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/MatMul_grad/MatMul_1" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/transpose_1_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/value/einsum/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/value/einsum/Reshape_2_grad/Reshape" + input: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/value/einsum/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/value/einsum/MatMul_grad/MatMul_1" + op: "MatMul" + input: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/value/einsum/Reshape" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/value/einsum/Reshape_2_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/query/add_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/query/add_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/transpose_grad/transpose" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/query/add_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/query/add_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\014\000\000\000@\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/query/add_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/query/add_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/query/add_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/key/add_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/key/add_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/transpose_1_grad/transpose" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/key/add_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/key/add_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\014\000\000\000@\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/key/add_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/key/add_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/key/add_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/value/einsum/Reshape_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/value/einsum/Reshape_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/value/einsum/MatMul_grad/MatMul" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/value/einsum/Reshape_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/value/einsum/Reshape_1_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\000\003\000\000@\000\000\000\014\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/value/einsum/Reshape_1_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/value/einsum/MatMul_grad/MatMul_1" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/value/einsum/Reshape_1_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 64 + } + dim { + size: 12 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/query/einsum/transpose_1_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/query/einsum/transpose_1/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/query/einsum/transpose_1_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/transpose_grad/transpose" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/query/einsum/transpose_1_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 64 + } + dim { + size: 12 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/query/Reshape_1_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 768 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/query/Reshape_1_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/query/add_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/query/Reshape_1_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/key/einsum/transpose_1_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/key/einsum/transpose_1/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/key/einsum/transpose_1_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/transpose_1_grad/transpose" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/key/einsum/transpose_1_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 64 + } + dim { + size: 12 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/key/Reshape_1_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 768 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/key/Reshape_1_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/key/add_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/key/Reshape_1_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/value/einsum/transpose_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/value/einsum/transpose/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/value/einsum/transpose_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/value/einsum/Reshape_1_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/value/einsum/transpose_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/query/einsum/Reshape_2_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\004\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/query/einsum/Reshape_2_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/query/einsum/transpose_1_grad/transpose" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/query/einsum/Reshape_2_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/key/einsum/Reshape_2_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\004\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/key/einsum/Reshape_2_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/key/einsum/transpose_1_grad/transpose" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/key/einsum/Reshape_2_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/value/Reshape_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/value/Reshape_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/value/einsum/transpose_grad/transpose" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/value/Reshape_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/query/einsum/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/query/einsum/Reshape_2_grad/Reshape" + input: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/query/einsum/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/query/einsum/MatMul_grad/MatMul_1" + op: "MatMul" + input: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/query/einsum/Reshape" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/query/einsum/Reshape_2_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/key/einsum/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/key/einsum/Reshape_2_grad/Reshape" + input: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/key/einsum/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/key/einsum/MatMul_grad/MatMul_1" + op: "MatMul" + input: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/key/einsum/Reshape" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/key/einsum/Reshape_2_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/query/einsum/Reshape_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/query/einsum/Reshape_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/query/einsum/MatMul_grad/MatMul" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/query/einsum/Reshape_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/query/einsum/Reshape_1_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\000\003\000\000@\000\000\000\014\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/query/einsum/Reshape_1_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/query/einsum/MatMul_grad/MatMul_1" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/query/einsum/Reshape_1_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 64 + } + dim { + size: 12 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/key/einsum/Reshape_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/key/einsum/Reshape_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/key/einsum/MatMul_grad/MatMul" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/key/einsum/Reshape_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/key/einsum/Reshape_1_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\000\003\000\000@\000\000\000\014\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/key/einsum/Reshape_1_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/key/einsum/MatMul_grad/MatMul_1" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/key/einsum/Reshape_1_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 64 + } + dim { + size: 12 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/query/einsum/transpose_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/query/einsum/transpose/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/query/einsum/transpose_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/query/einsum/Reshape_1_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/query/einsum/transpose_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/AddN_52" + op: "AddN" + input: "gradients/AddN_51" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/value/einsum/Reshape_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/query/einsum/Reshape_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/key/einsum/Reshape_grad/Reshape" + attr { + key: "N" + value { + i: 4 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm/batchnorm/mul_1_grad/Mul" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/key/einsum/transpose_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/key/einsum/transpose/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/key/einsum/transpose_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/key/einsum/Reshape_1_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/key/einsum/transpose_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/query/Reshape_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/query/Reshape_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/query/einsum/transpose_grad/transpose" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/query/Reshape_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm_1/batchnorm/mul_1_grad/Mul" + op: "Mul" + input: "gradients/AddN_52" + input: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm_1/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm_1/batchnorm/mul_1_grad/Mul_1" + op: "Mul" + input: "gradients/AddN_52" + input: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/add_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm_1/batchnorm/sub_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm_1/batchnorm/sub_grad/Sum" + op: "Sum" + input: "gradients/AddN_52" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm_1/batchnorm/sub_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm_1/batchnorm/sub_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 768 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm_1/batchnorm/sub_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm_1/batchnorm/sub_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm_1/batchnorm/sub_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm_1/batchnorm/sub_grad/Neg" + op: "Neg" + input: "gradients/AddN_52" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/key/Reshape_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/key/Reshape_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/key/einsum/transpose_grad/transpose" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/key/Reshape_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm_1/batchnorm/mul_2_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm_1/batchnorm/sub_grad/Neg" + input: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm_1/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm_1/batchnorm/mul_2_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 2 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm_1/batchnorm/mul_2_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm_1/batchnorm/mul_2_grad/Mul" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm_1/batchnorm/mul_2_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm_1/batchnorm/mul_2_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm_1/batchnorm/mul_2_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm_1/batchnorm/mul_2_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm_1/batchnorm/mul_2_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm_1/batchnorm/mul_2_grad/Mul_1" + op: "Mul" + input: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm_1/moments/mean" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm_1/batchnorm/sub_grad/Neg" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/AddN_53" + op: "AddN" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm_1/batchnorm/mul_1_grad/Mul_1" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm_1/batchnorm/mul_2_grad/Mul_1" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm_1/batchnorm/mul_1_grad/Mul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Mul" + op: "Mul" + input: "gradients/AddN_53" + input: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm_1/gamma/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 2 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Mul" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Mul_1" + op: "Mul" + input: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm_1/batchnorm/Rsqrt" + input: "gradients/AddN_53" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Sum_1/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Mul_1" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Sum_1/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Reshape_1/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 768 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Sum_1" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Reshape_1/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm_1/batchnorm/Rsqrt_grad/RsqrtGrad" + op: "RsqrtGrad" + input: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm_1/batchnorm/Rsqrt" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm_1/batchnorm/add_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\000\000\000\000\001\000\000\000\002\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm_1/batchnorm/add_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm_1/batchnorm/Rsqrt_grad/RsqrtGrad" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm_1/batchnorm/add_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm_1/batchnorm/add_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm_1/batchnorm/add_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm_1/batchnorm/add_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm_1/batchnorm/add_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm_1/moments/variance_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm_1/moments/variance_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm_1/batchnorm/Rsqrt_grad/RsqrtGrad" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm_1/moments/variance_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm_1/moments/variance_grad/Tile/multiples" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\001\000\000\000\001\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm_1/moments/variance_grad/Tile" + op: "Tile" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm_1/moments/variance_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm_1/moments/variance_grad/Tile/multiples" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tmultiples" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm_1/moments/variance_grad/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 768.0 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm_1/moments/variance_grad/truediv" + op: "RealDiv" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm_1/moments/variance_grad/Tile" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm_1/moments/variance_grad/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm_1/moments/SquaredDifference_grad/scalar" + op: "Const" + input: "^gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm_1/moments/variance_grad/truediv" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 2.0 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm_1/moments/SquaredDifference_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm_1/moments/SquaredDifference_grad/scalar" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm_1/moments/variance_grad/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm_1/moments/SquaredDifference_grad/sub" + op: "Sub" + input: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/add_1" + input: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm_1/moments/StopGradient" + input: "^gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm_1/moments/variance_grad/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm_1/moments/SquaredDifference_grad/mul_1" + op: "Mul" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm_1/moments/SquaredDifference_grad/Mul" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm_1/moments/SquaredDifference_grad/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm_1/moments/SquaredDifference_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 2 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm_1/moments/SquaredDifference_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm_1/moments/SquaredDifference_grad/mul_1" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm_1/moments/SquaredDifference_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm_1/moments/SquaredDifference_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm_1/moments/SquaredDifference_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm_1/moments/SquaredDifference_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm_1/moments/SquaredDifference_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm_1/moments/SquaredDifference_grad/Neg" + op: "Neg" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm_1/moments/SquaredDifference_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm_1/moments/mean_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm_1/moments/mean_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm_1/batchnorm/mul_2_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm_1/moments/mean_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm_1/moments/mean_grad/Tile/multiples" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\001\000\000\000\001\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm_1/moments/mean_grad/Tile" + op: "Tile" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm_1/moments/mean_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm_1/moments/mean_grad/Tile/multiples" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tmultiples" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm_1/moments/mean_grad/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 768.0 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm_1/moments/mean_grad/truediv" + op: "RealDiv" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm_1/moments/mean_grad/Tile" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm_1/moments/mean_grad/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/AddN_54" + op: "AddN" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm_1/batchnorm/mul_1_grad/Mul" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm_1/moments/SquaredDifference_grad/mul_1" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm_1/moments/mean_grad/truediv" + attr { + key: "N" + value { + i: 3 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm_1/batchnorm/mul_1_grad/Mul" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/ffn_1/intermediate/output/dense/add_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/ffn_1/intermediate/output/dense/add_grad/Sum" + op: "Sum" + input: "gradients/AddN_54" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/ffn_1/intermediate/output/dense/add_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/ffn_1/intermediate/output/dense/add_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 768 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/ffn_1/intermediate/output/dense/add_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/ffn_1/intermediate/output/dense/add_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/ffn_1/intermediate/output/dense/add_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape_1_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\004\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape_1_grad/Reshape" + op: "Reshape" + input: "gradients/AddN_54" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape_1_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/ffn_1/intermediate/output/dense/einsum/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape_1_grad/Reshape" + input: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/ffn_1/intermediate/output/dense/einsum/MatMul_grad/MatMul_1" + op: "MatMul" + input: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape_1_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\000\014\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/ffn_1/intermediate/output/dense/einsum/MatMul_grad/MatMul" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/ffn_1/intermediate/mul_3_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape_grad/Reshape" + input: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/ffn_1/intermediate/mul_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/ffn_1/intermediate/mul_3_grad/Mul_1" + op: "Mul" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape_grad/Reshape" + input: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/ffn_1/intermediate/dense/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/ffn_1/intermediate/mul_2_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/ffn_1/intermediate/mul_3_grad/Mul_1" + input: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/ffn_1/intermediate/add_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/ffn_1/intermediate/mul_2_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\000\000\000\000\001\000\000\000\002\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/ffn_1/intermediate/mul_2_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/ffn_1/intermediate/mul_2_grad/Mul" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/ffn_1/intermediate/mul_2_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/ffn_1/intermediate/mul_2_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/ffn_1/intermediate/mul_2_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/ffn_1/intermediate/mul_2_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/ffn_1/intermediate/mul_2_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/ffn_1/intermediate/mul_2_grad/Mul_1" + op: "Mul" + input: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/ffn_1/intermediate/mul_2/x" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/ffn_1/intermediate/mul_3_grad/Mul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/ffn_1/intermediate/add_1_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\000\000\000\000\001\000\000\000\002\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/ffn_1/intermediate/add_1_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/ffn_1/intermediate/mul_2_grad/Mul_1" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/ffn_1/intermediate/add_1_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/ffn_1/intermediate/add_1_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/ffn_1/intermediate/add_1_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/ffn_1/intermediate/add_1_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/ffn_1/intermediate/add_1_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/ffn_1/intermediate/Tanh_grad/TanhGrad" + op: "TanhGrad" + input: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/ffn_1/intermediate/Tanh" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/ffn_1/intermediate/mul_2_grad/Mul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/ffn_1/intermediate/mul_1_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/ffn_1/intermediate/Tanh_grad/TanhGrad" + input: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/ffn_1/intermediate/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/ffn_1/intermediate/mul_1_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\000\000\000\000\001\000\000\000\002\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/ffn_1/intermediate/mul_1_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/ffn_1/intermediate/mul_1_grad/Mul" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/ffn_1/intermediate/mul_1_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/ffn_1/intermediate/mul_1_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/ffn_1/intermediate/mul_1_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/ffn_1/intermediate/mul_1_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/ffn_1/intermediate/mul_1_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/ffn_1/intermediate/mul_1_grad/Mul_1" + op: "Mul" + input: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/ffn_1/intermediate/mul_1/x" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/ffn_1/intermediate/Tanh_grad/TanhGrad" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/ffn_1/intermediate/mul_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/ffn_1/intermediate/mul_1_grad/Mul_1" + input: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/ffn_1/intermediate/Pow" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/ffn_1/intermediate/mul_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\000\000\000\000\001\000\000\000\002\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/ffn_1/intermediate/mul_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/ffn_1/intermediate/mul_grad/Mul" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/ffn_1/intermediate/mul_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/ffn_1/intermediate/mul_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/ffn_1/intermediate/mul_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/ffn_1/intermediate/mul_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/ffn_1/intermediate/mul_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/ffn_1/intermediate/mul_grad/Mul_1" + op: "Mul" + input: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/ffn_1/intermediate/mul/x" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/ffn_1/intermediate/mul_1_grad/Mul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/ffn_1/intermediate/Pow_grad/mul" + op: "Mul" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/ffn_1/intermediate/mul_grad/Mul_1" + input: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/ffn_1/intermediate/Pow/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/ffn_1/intermediate/Pow_grad/sub/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/ffn_1/intermediate/Pow_grad/sub" + op: "Sub" + input: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/ffn_1/intermediate/Pow/y" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/ffn_1/intermediate/Pow_grad/sub/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/ffn_1/intermediate/Pow_grad/Pow" + op: "Pow" + input: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/ffn_1/intermediate/dense/add" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/ffn_1/intermediate/Pow_grad/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/ffn_1/intermediate/Pow_grad/mul_1" + op: "Mul" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/ffn_1/intermediate/Pow_grad/mul" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/ffn_1/intermediate/Pow_grad/Pow" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/ffn_1/intermediate/Pow_grad/Greater/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/ffn_1/intermediate/Pow_grad/Greater" + op: "Greater" + input: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/ffn_1/intermediate/dense/add" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/ffn_1/intermediate/Pow_grad/Greater/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/ffn_1/intermediate/Pow_grad/ones_like/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\000\014\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/ffn_1/intermediate/Pow_grad/ones_like/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/ffn_1/intermediate/Pow_grad/ones_like" + op: "Fill" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/ffn_1/intermediate/Pow_grad/ones_like/Shape" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/ffn_1/intermediate/Pow_grad/ones_like/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/ffn_1/intermediate/Pow_grad/Select" + op: "Select" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/ffn_1/intermediate/Pow_grad/Greater" + input: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/ffn_1/intermediate/dense/add" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/ffn_1/intermediate/Pow_grad/ones_like" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/ffn_1/intermediate/Pow_grad/Log" + op: "Log" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/ffn_1/intermediate/Pow_grad/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/ffn_1/intermediate/Pow_grad/zeros_like/shape_as_tensor" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\000\014\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/ffn_1/intermediate/Pow_grad/zeros_like/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/ffn_1/intermediate/Pow_grad/zeros_like" + op: "Fill" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/ffn_1/intermediate/Pow_grad/zeros_like/shape_as_tensor" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/ffn_1/intermediate/Pow_grad/zeros_like/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/ffn_1/intermediate/Pow_grad/Select_1" + op: "Select" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/ffn_1/intermediate/Pow_grad/Greater" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/ffn_1/intermediate/Pow_grad/Log" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/ffn_1/intermediate/Pow_grad/zeros_like" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/ffn_1/intermediate/Pow_grad/mul_2" + op: "Mul" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/ffn_1/intermediate/mul_grad/Mul_1" + input: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/ffn_1/intermediate/Pow" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/ffn_1/intermediate/Pow_grad/mul_3" + op: "Mul" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/ffn_1/intermediate/Pow_grad/mul_2" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/ffn_1/intermediate/Pow_grad/Select_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/ffn_1/intermediate/Pow_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\000\000\000\000\001\000\000\000\002\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/ffn_1/intermediate/Pow_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/ffn_1/intermediate/Pow_grad/mul_3" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/ffn_1/intermediate/Pow_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/ffn_1/intermediate/Pow_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/ffn_1/intermediate/Pow_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/ffn_1/intermediate/Pow_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/ffn_1/intermediate/Pow_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/AddN_55" + op: "AddN" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/ffn_1/intermediate/mul_3_grad/Mul" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/ffn_1/intermediate/mul_1_grad/Mul_1" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/ffn_1/intermediate/Pow_grad/mul_1" + attr { + key: "N" + value { + i: 3 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/ffn_1/intermediate/mul_3_grad/Mul" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/ffn_1/intermediate/dense/add_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/ffn_1/intermediate/dense/add_grad/Sum" + op: "Sum" + input: "gradients/AddN_55" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/ffn_1/intermediate/dense/add_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/ffn_1/intermediate/dense/add_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 3072 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/ffn_1/intermediate/dense/add_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/ffn_1/intermediate/dense/add_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/ffn_1/intermediate/dense/add_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/ffn_1/intermediate/dense/einsum/Reshape_1_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\004\000\000\000\014\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/ffn_1/intermediate/dense/einsum/Reshape_1_grad/Reshape" + op: "Reshape" + input: "gradients/AddN_55" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/ffn_1/intermediate/dense/einsum/Reshape_1_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/ffn_1/intermediate/dense/einsum/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/ffn_1/intermediate/dense/einsum/Reshape_1_grad/Reshape" + input: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/ffn_1/intermediate/dense/einsum/MatMul_grad/MatMul_1" + op: "MatMul" + input: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/ffn_1/intermediate/dense/einsum/Reshape" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/ffn_1/intermediate/dense/einsum/Reshape_1_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/ffn_1/intermediate/dense/einsum/Reshape_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/ffn_1/intermediate/dense/einsum/Reshape_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/ffn_1/intermediate/dense/einsum/MatMul_grad/MatMul" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/ffn_1/intermediate/dense/einsum/Reshape_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/AddN_56" + op: "AddN" + input: "gradients/AddN_54" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/ffn_1/intermediate/dense/einsum/Reshape_grad/Reshape" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm_1/batchnorm/mul_1_grad/Mul" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm/batchnorm/mul_1_grad/Mul" + op: "Mul" + input: "gradients/AddN_56" + input: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm/batchnorm/mul_1_grad/Mul_1" + op: "Mul" + input: "gradients/AddN_56" + input: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm/batchnorm/sub_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm/batchnorm/sub_grad/Sum" + op: "Sum" + input: "gradients/AddN_56" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm/batchnorm/sub_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm/batchnorm/sub_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 768 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm/batchnorm/sub_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm/batchnorm/sub_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm/batchnorm/sub_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm/batchnorm/sub_grad/Neg" + op: "Neg" + input: "gradients/AddN_56" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm/batchnorm/mul_2_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm/batchnorm/sub_grad/Neg" + input: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm/batchnorm/mul_2_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 2 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm/batchnorm/mul_2_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm/batchnorm/mul_2_grad/Mul" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm/batchnorm/mul_2_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm/batchnorm/mul_2_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm/batchnorm/mul_2_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm/batchnorm/mul_2_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm/batchnorm/mul_2_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm/batchnorm/mul_2_grad/Mul_1" + op: "Mul" + input: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm/moments/mean" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm/batchnorm/sub_grad/Neg" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/AddN_57" + op: "AddN" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm/batchnorm/mul_1_grad/Mul_1" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm/batchnorm/mul_2_grad/Mul_1" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm/batchnorm/mul_1_grad/Mul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm/batchnorm/mul_grad/Mul" + op: "Mul" + input: "gradients/AddN_57" + input: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm/gamma/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm/batchnorm/mul_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 2 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm/batchnorm/mul_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm/batchnorm/mul_grad/Mul" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm/batchnorm/mul_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm/batchnorm/mul_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm/batchnorm/mul_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm/batchnorm/mul_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm/batchnorm/mul_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm/batchnorm/mul_grad/Mul_1" + op: "Mul" + input: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm/batchnorm/Rsqrt" + input: "gradients/AddN_57" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm/batchnorm/mul_grad/Sum_1/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm/batchnorm/mul_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm/batchnorm/mul_grad/Mul_1" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm/batchnorm/mul_grad/Sum_1/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm/batchnorm/mul_grad/Reshape_1/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 768 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm/batchnorm/mul_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm/batchnorm/mul_grad/Sum_1" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm/batchnorm/mul_grad/Reshape_1/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad" + op: "RsqrtGrad" + input: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm/batchnorm/Rsqrt" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm/batchnorm/mul_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm/batchnorm/add_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\000\000\000\000\001\000\000\000\002\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm/batchnorm/add_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm/batchnorm/add_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm/batchnorm/add_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm/batchnorm/add_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm/batchnorm/add_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm/batchnorm/add_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm/moments/variance_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm/moments/variance_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm/moments/variance_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm/moments/variance_grad/Tile/multiples" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\001\000\000\000\001\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm/moments/variance_grad/Tile" + op: "Tile" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm/moments/variance_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm/moments/variance_grad/Tile/multiples" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tmultiples" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm/moments/variance_grad/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 768.0 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm/moments/variance_grad/truediv" + op: "RealDiv" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm/moments/variance_grad/Tile" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm/moments/variance_grad/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm/moments/SquaredDifference_grad/scalar" + op: "Const" + input: "^gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm/moments/variance_grad/truediv" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 2.0 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm/moments/SquaredDifference_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm/moments/SquaredDifference_grad/scalar" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm/moments/variance_grad/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm/moments/SquaredDifference_grad/sub" + op: "Sub" + input: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/add" + input: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm/moments/StopGradient" + input: "^gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm/moments/variance_grad/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm/moments/SquaredDifference_grad/mul_1" + op: "Mul" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm/moments/SquaredDifference_grad/Mul" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm/moments/SquaredDifference_grad/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm/moments/SquaredDifference_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 2 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm/moments/SquaredDifference_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm/moments/SquaredDifference_grad/mul_1" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm/moments/SquaredDifference_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm/moments/SquaredDifference_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm/moments/SquaredDifference_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm/moments/SquaredDifference_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm/moments/SquaredDifference_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm/moments/SquaredDifference_grad/Neg" + op: "Neg" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm/moments/SquaredDifference_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm/moments/mean_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm/moments/mean_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm/batchnorm/mul_2_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm/moments/mean_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm/moments/mean_grad/Tile/multiples" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\001\000\000\000\001\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm/moments/mean_grad/Tile" + op: "Tile" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm/moments/mean_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm/moments/mean_grad/Tile/multiples" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tmultiples" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm/moments/mean_grad/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 768.0 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm/moments/mean_grad/truediv" + op: "RealDiv" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm/moments/mean_grad/Tile" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm/moments/mean_grad/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/AddN_58" + op: "AddN" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm/batchnorm/mul_1_grad/Mul" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm/moments/SquaredDifference_grad/mul_1" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm/moments/mean_grad/truediv" + attr { + key: "N" + value { + i: 3 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm/batchnorm/mul_1_grad/Mul" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/output/dense/add_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/output/dense/add_grad/Sum" + op: "Sum" + input: "gradients/AddN_58" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/output/dense/add_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/output/dense/add_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 768 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/output/dense/add_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/output/dense/add_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/output/dense/add_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/output/dense/einsum/Reshape_2_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\004\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/output/dense/einsum/Reshape_2_grad/Reshape" + op: "Reshape" + input: "gradients/AddN_58" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/output/dense/einsum/Reshape_2_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/output/dense/einsum/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/output/dense/einsum/Reshape_2_grad/Reshape" + input: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/output/dense/einsum/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/output/dense/einsum/MatMul_grad/MatMul_1" + op: "MatMul" + input: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/output/dense/einsum/Reshape" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/output/dense/einsum/Reshape_2_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/output/dense/einsum/Reshape_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\020\000\000\000@\000\000\000@\000\000\000\014\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/output/dense/einsum/Reshape_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/output/dense/einsum/MatMul_grad/MatMul" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/output/dense/einsum/Reshape_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 64 + } + dim { + size: 12 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/output/dense/einsum/Reshape_1_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "@\000\000\000\014\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/output/dense/einsum/Reshape_1_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/output/dense/einsum/MatMul_grad/MatMul_1" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/output/dense/einsum/Reshape_1_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 64 + } + dim { + size: 12 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/output/dense/einsum/transpose_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/output/dense/einsum/transpose/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/output/dense/einsum/transpose_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/output/dense/einsum/Reshape_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/output/dense/einsum/transpose_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/output/dense/einsum/transpose_1_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/output/dense/einsum/transpose_1/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/output/dense/einsum/transpose_1_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/output/dense/einsum/Reshape_1_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/output/dense/einsum/transpose_1_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/transpose_3_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/transpose_3/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/transpose_3_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/output/dense/einsum/transpose_grad/transpose" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/transpose_3_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/output/dense/Reshape_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/output/dense/Reshape_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/output/dense/einsum/transpose_1_grad/transpose" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/output/dense/Reshape_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/MatMul_2_grad/MatMul" + op: "BatchMatMulV2" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/transpose_3_grad/transpose" + input: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/transpose_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: false + } + } + attr { + key: "adj_y" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/MatMul_2_grad/MatMul_1" + op: "BatchMatMulV2" + input: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/attention_probs" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/transpose_3_grad/transpose" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: true + } + } + attr { + key: "adj_y" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/attention_probs_grad/mul" + op: "Mul" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/MatMul_2_grad/MatMul" + input: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/attention_probs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/attention_probs_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: -1 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/attention_probs_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/attention_probs_grad/mul" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/attention_probs_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/attention_probs_grad/sub" + op: "Sub" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/MatMul_2_grad/MatMul" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/attention_probs_grad/Sum" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/attention_probs_grad/mul_1" + op: "Mul" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/attention_probs_grad/sub" + input: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/attention_probs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/transpose_2_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/transpose_2/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/transpose_2_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/MatMul_2_grad/MatMul_1" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/transpose_2_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/add_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/add_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/attention_probs_grad/mul_1" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/add_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/add_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\020\000\000\000\001\000\000\000@\000\000\000@\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/add_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/add_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/add_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 1 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/value/add_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/value/add_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/transpose_2_grad/transpose" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/value/add_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/value/add_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\014\000\000\000@\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/value/add_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/value/add_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/value/add_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/Mul_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/attention_probs_grad/mul_1" + input: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/Mul/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/Mul_grad/Mul_1" + op: "Mul" + input: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/MatMul" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/attention_probs_grad/mul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/Mul_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\000\000\000\000\001\000\000\000\002\000\000\000\003\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/Mul_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/Mul_grad/Mul_1" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/Mul_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/Mul_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/Mul_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/Mul_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/Mul_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/value/einsum/transpose_1_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/value/einsum/transpose_1/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/value/einsum/transpose_1_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/transpose_2_grad/transpose" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/value/einsum/transpose_1_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 64 + } + dim { + size: 12 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/value/Reshape_1_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 768 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/value/Reshape_1_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/value/add_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/value/Reshape_1_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/MatMul_grad/MatMul" + op: "BatchMatMulV2" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/Mul_grad/Mul" + input: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/transpose_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: false + } + } + attr { + key: "adj_y" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/MatMul_grad/MatMul_1" + op: "BatchMatMulV2" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/Mul_grad/Mul" + input: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/transpose" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: true + } + } + attr { + key: "adj_y" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/value/einsum/Reshape_2_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\004\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/value/einsum/Reshape_2_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/value/einsum/transpose_1_grad/transpose" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/value/einsum/Reshape_2_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/transpose_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/transpose/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/transpose_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/MatMul_grad/MatMul" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/transpose_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/transpose_1_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/transpose_1/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/transpose_1_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/MatMul_grad/MatMul_1" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/transpose_1_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/value/einsum/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/value/einsum/Reshape_2_grad/Reshape" + input: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/value/einsum/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/value/einsum/MatMul_grad/MatMul_1" + op: "MatMul" + input: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/value/einsum/Reshape" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/value/einsum/Reshape_2_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/query/add_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/query/add_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/transpose_grad/transpose" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/query/add_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/query/add_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\014\000\000\000@\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/query/add_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/query/add_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/query/add_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/key/add_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/key/add_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/transpose_1_grad/transpose" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/key/add_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/key/add_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\014\000\000\000@\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/key/add_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/key/add_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/key/add_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/value/einsum/Reshape_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/value/einsum/Reshape_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/value/einsum/MatMul_grad/MatMul" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/value/einsum/Reshape_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/value/einsum/Reshape_1_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\000\003\000\000@\000\000\000\014\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/value/einsum/Reshape_1_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/value/einsum/MatMul_grad/MatMul_1" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/value/einsum/Reshape_1_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 64 + } + dim { + size: 12 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/query/einsum/transpose_1_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/query/einsum/transpose_1/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/query/einsum/transpose_1_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/transpose_grad/transpose" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/query/einsum/transpose_1_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 64 + } + dim { + size: 12 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/query/Reshape_1_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 768 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/query/Reshape_1_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/query/add_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/query/Reshape_1_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/key/einsum/transpose_1_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/key/einsum/transpose_1/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/key/einsum/transpose_1_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/transpose_1_grad/transpose" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/key/einsum/transpose_1_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 64 + } + dim { + size: 12 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/key/Reshape_1_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 768 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/key/Reshape_1_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/key/add_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/key/Reshape_1_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/value/einsum/transpose_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/value/einsum/transpose/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/value/einsum/transpose_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/value/einsum/Reshape_1_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/value/einsum/transpose_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/query/einsum/Reshape_2_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\004\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/query/einsum/Reshape_2_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/query/einsum/transpose_1_grad/transpose" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/query/einsum/Reshape_2_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/key/einsum/Reshape_2_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\004\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/key/einsum/Reshape_2_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/key/einsum/transpose_1_grad/transpose" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/key/einsum/Reshape_2_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/value/Reshape_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/value/Reshape_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/value/einsum/transpose_grad/transpose" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/value/Reshape_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/query/einsum/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/query/einsum/Reshape_2_grad/Reshape" + input: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/query/einsum/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/query/einsum/MatMul_grad/MatMul_1" + op: "MatMul" + input: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/query/einsum/Reshape" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/query/einsum/Reshape_2_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/key/einsum/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/key/einsum/Reshape_2_grad/Reshape" + input: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/key/einsum/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/key/einsum/MatMul_grad/MatMul_1" + op: "MatMul" + input: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/key/einsum/Reshape" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/key/einsum/Reshape_2_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/query/einsum/Reshape_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/query/einsum/Reshape_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/query/einsum/MatMul_grad/MatMul" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/query/einsum/Reshape_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/query/einsum/Reshape_1_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\000\003\000\000@\000\000\000\014\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/query/einsum/Reshape_1_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/query/einsum/MatMul_grad/MatMul_1" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/query/einsum/Reshape_1_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 64 + } + dim { + size: 12 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/key/einsum/Reshape_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/key/einsum/Reshape_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/key/einsum/MatMul_grad/MatMul" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/key/einsum/Reshape_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/key/einsum/Reshape_1_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\000\003\000\000@\000\000\000\014\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/key/einsum/Reshape_1_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/key/einsum/MatMul_grad/MatMul_1" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/key/einsum/Reshape_1_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 64 + } + dim { + size: 12 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/query/einsum/transpose_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/query/einsum/transpose/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/query/einsum/transpose_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/query/einsum/Reshape_1_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/query/einsum/transpose_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/AddN_59" + op: "AddN" + input: "gradients/AddN_58" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/value/einsum/Reshape_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/query/einsum/Reshape_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/key/einsum/Reshape_grad/Reshape" + attr { + key: "N" + value { + i: 4 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm/batchnorm/mul_1_grad/Mul" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/key/einsum/transpose_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/key/einsum/transpose/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/key/einsum/transpose_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/key/einsum/Reshape_1_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/key/einsum/transpose_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/query/Reshape_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/query/Reshape_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/query/einsum/transpose_grad/transpose" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/query/Reshape_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm_1/batchnorm/mul_1_grad/Mul" + op: "Mul" + input: "gradients/AddN_59" + input: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm_1/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm_1/batchnorm/mul_1_grad/Mul_1" + op: "Mul" + input: "gradients/AddN_59" + input: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/add_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm_1/batchnorm/sub_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm_1/batchnorm/sub_grad/Sum" + op: "Sum" + input: "gradients/AddN_59" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm_1/batchnorm/sub_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm_1/batchnorm/sub_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 768 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm_1/batchnorm/sub_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm_1/batchnorm/sub_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm_1/batchnorm/sub_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm_1/batchnorm/sub_grad/Neg" + op: "Neg" + input: "gradients/AddN_59" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/key/Reshape_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/key/Reshape_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/key/einsum/transpose_grad/transpose" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/key/Reshape_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm_1/batchnorm/mul_2_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm_1/batchnorm/sub_grad/Neg" + input: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm_1/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm_1/batchnorm/mul_2_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 2 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm_1/batchnorm/mul_2_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm_1/batchnorm/mul_2_grad/Mul" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm_1/batchnorm/mul_2_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm_1/batchnorm/mul_2_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm_1/batchnorm/mul_2_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm_1/batchnorm/mul_2_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm_1/batchnorm/mul_2_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm_1/batchnorm/mul_2_grad/Mul_1" + op: "Mul" + input: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm_1/moments/mean" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm_1/batchnorm/sub_grad/Neg" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/AddN_60" + op: "AddN" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm_1/batchnorm/mul_1_grad/Mul_1" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm_1/batchnorm/mul_2_grad/Mul_1" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm_1/batchnorm/mul_1_grad/Mul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Mul" + op: "Mul" + input: "gradients/AddN_60" + input: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm_1/gamma/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 2 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Mul" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Mul_1" + op: "Mul" + input: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm_1/batchnorm/Rsqrt" + input: "gradients/AddN_60" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Sum_1/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Mul_1" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Sum_1/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Reshape_1/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 768 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Sum_1" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Reshape_1/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm_1/batchnorm/Rsqrt_grad/RsqrtGrad" + op: "RsqrtGrad" + input: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm_1/batchnorm/Rsqrt" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm_1/batchnorm/add_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\000\000\000\000\001\000\000\000\002\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm_1/batchnorm/add_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm_1/batchnorm/Rsqrt_grad/RsqrtGrad" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm_1/batchnorm/add_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm_1/batchnorm/add_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm_1/batchnorm/add_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm_1/batchnorm/add_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm_1/batchnorm/add_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm_1/moments/variance_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm_1/moments/variance_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm_1/batchnorm/Rsqrt_grad/RsqrtGrad" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm_1/moments/variance_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm_1/moments/variance_grad/Tile/multiples" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\001\000\000\000\001\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm_1/moments/variance_grad/Tile" + op: "Tile" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm_1/moments/variance_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm_1/moments/variance_grad/Tile/multiples" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tmultiples" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm_1/moments/variance_grad/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 768.0 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm_1/moments/variance_grad/truediv" + op: "RealDiv" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm_1/moments/variance_grad/Tile" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm_1/moments/variance_grad/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm_1/moments/SquaredDifference_grad/scalar" + op: "Const" + input: "^gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm_1/moments/variance_grad/truediv" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 2.0 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm_1/moments/SquaredDifference_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm_1/moments/SquaredDifference_grad/scalar" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm_1/moments/variance_grad/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm_1/moments/SquaredDifference_grad/sub" + op: "Sub" + input: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/add_1" + input: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm_1/moments/StopGradient" + input: "^gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm_1/moments/variance_grad/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm_1/moments/SquaredDifference_grad/mul_1" + op: "Mul" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm_1/moments/SquaredDifference_grad/Mul" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm_1/moments/SquaredDifference_grad/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm_1/moments/SquaredDifference_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 2 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm_1/moments/SquaredDifference_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm_1/moments/SquaredDifference_grad/mul_1" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm_1/moments/SquaredDifference_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm_1/moments/SquaredDifference_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm_1/moments/SquaredDifference_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm_1/moments/SquaredDifference_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm_1/moments/SquaredDifference_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm_1/moments/SquaredDifference_grad/Neg" + op: "Neg" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm_1/moments/SquaredDifference_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm_1/moments/mean_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm_1/moments/mean_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm_1/batchnorm/mul_2_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm_1/moments/mean_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm_1/moments/mean_grad/Tile/multiples" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\001\000\000\000\001\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm_1/moments/mean_grad/Tile" + op: "Tile" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm_1/moments/mean_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm_1/moments/mean_grad/Tile/multiples" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tmultiples" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm_1/moments/mean_grad/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 768.0 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm_1/moments/mean_grad/truediv" + op: "RealDiv" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm_1/moments/mean_grad/Tile" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm_1/moments/mean_grad/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/AddN_61" + op: "AddN" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm_1/batchnorm/mul_1_grad/Mul" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm_1/moments/SquaredDifference_grad/mul_1" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm_1/moments/mean_grad/truediv" + attr { + key: "N" + value { + i: 3 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm_1/batchnorm/mul_1_grad/Mul" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/ffn_1/intermediate/output/dense/add_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/ffn_1/intermediate/output/dense/add_grad/Sum" + op: "Sum" + input: "gradients/AddN_61" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/ffn_1/intermediate/output/dense/add_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/ffn_1/intermediate/output/dense/add_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 768 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/ffn_1/intermediate/output/dense/add_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/ffn_1/intermediate/output/dense/add_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/ffn_1/intermediate/output/dense/add_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape_1_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\004\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape_1_grad/Reshape" + op: "Reshape" + input: "gradients/AddN_61" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape_1_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/ffn_1/intermediate/output/dense/einsum/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape_1_grad/Reshape" + input: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/ffn_1/intermediate/output/dense/einsum/MatMul_grad/MatMul_1" + op: "MatMul" + input: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape_1_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\000\014\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/ffn_1/intermediate/output/dense/einsum/MatMul_grad/MatMul" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/ffn_1/intermediate/mul_3_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape_grad/Reshape" + input: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/ffn_1/intermediate/mul_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/ffn_1/intermediate/mul_3_grad/Mul_1" + op: "Mul" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape_grad/Reshape" + input: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/ffn_1/intermediate/dense/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/ffn_1/intermediate/mul_2_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/ffn_1/intermediate/mul_3_grad/Mul_1" + input: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/ffn_1/intermediate/add_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/ffn_1/intermediate/mul_2_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\000\000\000\000\001\000\000\000\002\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/ffn_1/intermediate/mul_2_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/ffn_1/intermediate/mul_2_grad/Mul" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/ffn_1/intermediate/mul_2_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/ffn_1/intermediate/mul_2_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/ffn_1/intermediate/mul_2_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/ffn_1/intermediate/mul_2_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/ffn_1/intermediate/mul_2_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/ffn_1/intermediate/mul_2_grad/Mul_1" + op: "Mul" + input: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/ffn_1/intermediate/mul_2/x" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/ffn_1/intermediate/mul_3_grad/Mul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/ffn_1/intermediate/add_1_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\000\000\000\000\001\000\000\000\002\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/ffn_1/intermediate/add_1_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/ffn_1/intermediate/mul_2_grad/Mul_1" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/ffn_1/intermediate/add_1_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/ffn_1/intermediate/add_1_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/ffn_1/intermediate/add_1_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/ffn_1/intermediate/add_1_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/ffn_1/intermediate/add_1_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/ffn_1/intermediate/Tanh_grad/TanhGrad" + op: "TanhGrad" + input: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/ffn_1/intermediate/Tanh" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/ffn_1/intermediate/mul_2_grad/Mul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/ffn_1/intermediate/mul_1_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/ffn_1/intermediate/Tanh_grad/TanhGrad" + input: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/ffn_1/intermediate/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/ffn_1/intermediate/mul_1_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\000\000\000\000\001\000\000\000\002\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/ffn_1/intermediate/mul_1_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/ffn_1/intermediate/mul_1_grad/Mul" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/ffn_1/intermediate/mul_1_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/ffn_1/intermediate/mul_1_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/ffn_1/intermediate/mul_1_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/ffn_1/intermediate/mul_1_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/ffn_1/intermediate/mul_1_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/ffn_1/intermediate/mul_1_grad/Mul_1" + op: "Mul" + input: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/ffn_1/intermediate/mul_1/x" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/ffn_1/intermediate/Tanh_grad/TanhGrad" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/ffn_1/intermediate/mul_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/ffn_1/intermediate/mul_1_grad/Mul_1" + input: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/ffn_1/intermediate/Pow" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/ffn_1/intermediate/mul_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\000\000\000\000\001\000\000\000\002\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/ffn_1/intermediate/mul_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/ffn_1/intermediate/mul_grad/Mul" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/ffn_1/intermediate/mul_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/ffn_1/intermediate/mul_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/ffn_1/intermediate/mul_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/ffn_1/intermediate/mul_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/ffn_1/intermediate/mul_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/ffn_1/intermediate/mul_grad/Mul_1" + op: "Mul" + input: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/ffn_1/intermediate/mul/x" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/ffn_1/intermediate/mul_1_grad/Mul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/ffn_1/intermediate/Pow_grad/mul" + op: "Mul" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/ffn_1/intermediate/mul_grad/Mul_1" + input: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/ffn_1/intermediate/Pow/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/ffn_1/intermediate/Pow_grad/sub/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/ffn_1/intermediate/Pow_grad/sub" + op: "Sub" + input: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/ffn_1/intermediate/Pow/y" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/ffn_1/intermediate/Pow_grad/sub/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/ffn_1/intermediate/Pow_grad/Pow" + op: "Pow" + input: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/ffn_1/intermediate/dense/add" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/ffn_1/intermediate/Pow_grad/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/ffn_1/intermediate/Pow_grad/mul_1" + op: "Mul" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/ffn_1/intermediate/Pow_grad/mul" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/ffn_1/intermediate/Pow_grad/Pow" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/ffn_1/intermediate/Pow_grad/Greater/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/ffn_1/intermediate/Pow_grad/Greater" + op: "Greater" + input: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/ffn_1/intermediate/dense/add" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/ffn_1/intermediate/Pow_grad/Greater/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/ffn_1/intermediate/Pow_grad/ones_like/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\000\014\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/ffn_1/intermediate/Pow_grad/ones_like/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/ffn_1/intermediate/Pow_grad/ones_like" + op: "Fill" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/ffn_1/intermediate/Pow_grad/ones_like/Shape" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/ffn_1/intermediate/Pow_grad/ones_like/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/ffn_1/intermediate/Pow_grad/Select" + op: "Select" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/ffn_1/intermediate/Pow_grad/Greater" + input: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/ffn_1/intermediate/dense/add" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/ffn_1/intermediate/Pow_grad/ones_like" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/ffn_1/intermediate/Pow_grad/Log" + op: "Log" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/ffn_1/intermediate/Pow_grad/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/ffn_1/intermediate/Pow_grad/zeros_like/shape_as_tensor" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\000\014\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/ffn_1/intermediate/Pow_grad/zeros_like/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/ffn_1/intermediate/Pow_grad/zeros_like" + op: "Fill" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/ffn_1/intermediate/Pow_grad/zeros_like/shape_as_tensor" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/ffn_1/intermediate/Pow_grad/zeros_like/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/ffn_1/intermediate/Pow_grad/Select_1" + op: "Select" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/ffn_1/intermediate/Pow_grad/Greater" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/ffn_1/intermediate/Pow_grad/Log" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/ffn_1/intermediate/Pow_grad/zeros_like" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/ffn_1/intermediate/Pow_grad/mul_2" + op: "Mul" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/ffn_1/intermediate/mul_grad/Mul_1" + input: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/ffn_1/intermediate/Pow" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/ffn_1/intermediate/Pow_grad/mul_3" + op: "Mul" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/ffn_1/intermediate/Pow_grad/mul_2" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/ffn_1/intermediate/Pow_grad/Select_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/ffn_1/intermediate/Pow_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\000\000\000\000\001\000\000\000\002\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/ffn_1/intermediate/Pow_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/ffn_1/intermediate/Pow_grad/mul_3" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/ffn_1/intermediate/Pow_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/ffn_1/intermediate/Pow_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/ffn_1/intermediate/Pow_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/ffn_1/intermediate/Pow_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/ffn_1/intermediate/Pow_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/AddN_62" + op: "AddN" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/ffn_1/intermediate/mul_3_grad/Mul" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/ffn_1/intermediate/mul_1_grad/Mul_1" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/ffn_1/intermediate/Pow_grad/mul_1" + attr { + key: "N" + value { + i: 3 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/ffn_1/intermediate/mul_3_grad/Mul" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/ffn_1/intermediate/dense/add_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/ffn_1/intermediate/dense/add_grad/Sum" + op: "Sum" + input: "gradients/AddN_62" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/ffn_1/intermediate/dense/add_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/ffn_1/intermediate/dense/add_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 3072 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/ffn_1/intermediate/dense/add_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/ffn_1/intermediate/dense/add_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/ffn_1/intermediate/dense/add_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/ffn_1/intermediate/dense/einsum/Reshape_1_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\004\000\000\000\014\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/ffn_1/intermediate/dense/einsum/Reshape_1_grad/Reshape" + op: "Reshape" + input: "gradients/AddN_62" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/ffn_1/intermediate/dense/einsum/Reshape_1_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/ffn_1/intermediate/dense/einsum/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/ffn_1/intermediate/dense/einsum/Reshape_1_grad/Reshape" + input: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/ffn_1/intermediate/dense/einsum/MatMul_grad/MatMul_1" + op: "MatMul" + input: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/ffn_1/intermediate/dense/einsum/Reshape" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/ffn_1/intermediate/dense/einsum/Reshape_1_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/ffn_1/intermediate/dense/einsum/Reshape_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/ffn_1/intermediate/dense/einsum/Reshape_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/ffn_1/intermediate/dense/einsum/MatMul_grad/MatMul" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/ffn_1/intermediate/dense/einsum/Reshape_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/AddN_63" + op: "AddN" + input: "gradients/AddN_61" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/ffn_1/intermediate/dense/einsum/Reshape_grad/Reshape" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm_1/batchnorm/mul_1_grad/Mul" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm/batchnorm/mul_1_grad/Mul" + op: "Mul" + input: "gradients/AddN_63" + input: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm/batchnorm/mul_1_grad/Mul_1" + op: "Mul" + input: "gradients/AddN_63" + input: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm/batchnorm/sub_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm/batchnorm/sub_grad/Sum" + op: "Sum" + input: "gradients/AddN_63" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm/batchnorm/sub_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm/batchnorm/sub_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 768 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm/batchnorm/sub_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm/batchnorm/sub_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm/batchnorm/sub_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm/batchnorm/sub_grad/Neg" + op: "Neg" + input: "gradients/AddN_63" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm/batchnorm/mul_2_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm/batchnorm/sub_grad/Neg" + input: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm/batchnorm/mul_2_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 2 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm/batchnorm/mul_2_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm/batchnorm/mul_2_grad/Mul" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm/batchnorm/mul_2_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm/batchnorm/mul_2_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm/batchnorm/mul_2_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm/batchnorm/mul_2_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm/batchnorm/mul_2_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm/batchnorm/mul_2_grad/Mul_1" + op: "Mul" + input: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm/moments/mean" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm/batchnorm/sub_grad/Neg" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/AddN_64" + op: "AddN" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm/batchnorm/mul_1_grad/Mul_1" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm/batchnorm/mul_2_grad/Mul_1" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm/batchnorm/mul_1_grad/Mul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm/batchnorm/mul_grad/Mul" + op: "Mul" + input: "gradients/AddN_64" + input: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm/gamma/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm/batchnorm/mul_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 2 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm/batchnorm/mul_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm/batchnorm/mul_grad/Mul" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm/batchnorm/mul_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm/batchnorm/mul_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm/batchnorm/mul_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm/batchnorm/mul_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm/batchnorm/mul_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm/batchnorm/mul_grad/Mul_1" + op: "Mul" + input: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm/batchnorm/Rsqrt" + input: "gradients/AddN_64" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm/batchnorm/mul_grad/Sum_1/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm/batchnorm/mul_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm/batchnorm/mul_grad/Mul_1" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm/batchnorm/mul_grad/Sum_1/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm/batchnorm/mul_grad/Reshape_1/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 768 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm/batchnorm/mul_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm/batchnorm/mul_grad/Sum_1" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm/batchnorm/mul_grad/Reshape_1/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad" + op: "RsqrtGrad" + input: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm/batchnorm/Rsqrt" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm/batchnorm/mul_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm/batchnorm/add_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\000\000\000\000\001\000\000\000\002\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm/batchnorm/add_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm/batchnorm/add_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm/batchnorm/add_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm/batchnorm/add_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm/batchnorm/add_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm/batchnorm/add_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm/moments/variance_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm/moments/variance_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm/moments/variance_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm/moments/variance_grad/Tile/multiples" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\001\000\000\000\001\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm/moments/variance_grad/Tile" + op: "Tile" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm/moments/variance_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm/moments/variance_grad/Tile/multiples" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tmultiples" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm/moments/variance_grad/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 768.0 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm/moments/variance_grad/truediv" + op: "RealDiv" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm/moments/variance_grad/Tile" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm/moments/variance_grad/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm/moments/SquaredDifference_grad/scalar" + op: "Const" + input: "^gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm/moments/variance_grad/truediv" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 2.0 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm/moments/SquaredDifference_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm/moments/SquaredDifference_grad/scalar" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm/moments/variance_grad/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm/moments/SquaredDifference_grad/sub" + op: "Sub" + input: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/add" + input: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm/moments/StopGradient" + input: "^gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm/moments/variance_grad/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm/moments/SquaredDifference_grad/mul_1" + op: "Mul" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm/moments/SquaredDifference_grad/Mul" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm/moments/SquaredDifference_grad/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm/moments/SquaredDifference_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 2 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm/moments/SquaredDifference_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm/moments/SquaredDifference_grad/mul_1" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm/moments/SquaredDifference_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm/moments/SquaredDifference_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm/moments/SquaredDifference_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm/moments/SquaredDifference_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm/moments/SquaredDifference_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm/moments/SquaredDifference_grad/Neg" + op: "Neg" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm/moments/SquaredDifference_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm/moments/mean_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm/moments/mean_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm/batchnorm/mul_2_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm/moments/mean_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm/moments/mean_grad/Tile/multiples" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\001\000\000\000\001\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm/moments/mean_grad/Tile" + op: "Tile" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm/moments/mean_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm/moments/mean_grad/Tile/multiples" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tmultiples" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm/moments/mean_grad/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 768.0 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm/moments/mean_grad/truediv" + op: "RealDiv" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm/moments/mean_grad/Tile" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm/moments/mean_grad/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/AddN_65" + op: "AddN" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm/batchnorm/mul_1_grad/Mul" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm/moments/SquaredDifference_grad/mul_1" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm/moments/mean_grad/truediv" + attr { + key: "N" + value { + i: 3 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm/batchnorm/mul_1_grad/Mul" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/output/dense/add_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/output/dense/add_grad/Sum" + op: "Sum" + input: "gradients/AddN_65" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/output/dense/add_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/output/dense/add_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 768 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/output/dense/add_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/output/dense/add_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/output/dense/add_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/output/dense/einsum/Reshape_2_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\004\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/output/dense/einsum/Reshape_2_grad/Reshape" + op: "Reshape" + input: "gradients/AddN_65" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/output/dense/einsum/Reshape_2_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/output/dense/einsum/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/output/dense/einsum/Reshape_2_grad/Reshape" + input: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/output/dense/einsum/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/output/dense/einsum/MatMul_grad/MatMul_1" + op: "MatMul" + input: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/output/dense/einsum/Reshape" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/output/dense/einsum/Reshape_2_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/output/dense/einsum/Reshape_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\020\000\000\000@\000\000\000@\000\000\000\014\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/output/dense/einsum/Reshape_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/output/dense/einsum/MatMul_grad/MatMul" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/output/dense/einsum/Reshape_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 64 + } + dim { + size: 12 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/output/dense/einsum/Reshape_1_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "@\000\000\000\014\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/output/dense/einsum/Reshape_1_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/output/dense/einsum/MatMul_grad/MatMul_1" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/output/dense/einsum/Reshape_1_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 64 + } + dim { + size: 12 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/output/dense/einsum/transpose_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/output/dense/einsum/transpose/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/output/dense/einsum/transpose_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/output/dense/einsum/Reshape_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/output/dense/einsum/transpose_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/output/dense/einsum/transpose_1_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/output/dense/einsum/transpose_1/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/output/dense/einsum/transpose_1_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/output/dense/einsum/Reshape_1_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/output/dense/einsum/transpose_1_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/transpose_3_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/transpose_3/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/transpose_3_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/output/dense/einsum/transpose_grad/transpose" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/transpose_3_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/output/dense/Reshape_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/output/dense/Reshape_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/output/dense/einsum/transpose_1_grad/transpose" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/output/dense/Reshape_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/MatMul_2_grad/MatMul" + op: "BatchMatMulV2" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/transpose_3_grad/transpose" + input: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/transpose_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: false + } + } + attr { + key: "adj_y" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/MatMul_2_grad/MatMul_1" + op: "BatchMatMulV2" + input: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/attention_probs" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/transpose_3_grad/transpose" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: true + } + } + attr { + key: "adj_y" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/attention_probs_grad/mul" + op: "Mul" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/MatMul_2_grad/MatMul" + input: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/attention_probs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/attention_probs_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: -1 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/attention_probs_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/attention_probs_grad/mul" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/attention_probs_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/attention_probs_grad/sub" + op: "Sub" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/MatMul_2_grad/MatMul" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/attention_probs_grad/Sum" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/attention_probs_grad/mul_1" + op: "Mul" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/attention_probs_grad/sub" + input: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/attention_probs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/transpose_2_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/transpose_2/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/transpose_2_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/MatMul_2_grad/MatMul_1" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/transpose_2_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/add_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/add_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/attention_probs_grad/mul_1" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/add_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/add_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\020\000\000\000\001\000\000\000@\000\000\000@\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/add_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/add_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/add_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 1 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/value/add_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/value/add_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/transpose_2_grad/transpose" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/value/add_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/value/add_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\014\000\000\000@\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/value/add_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/value/add_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/value/add_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/Mul_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/attention_probs_grad/mul_1" + input: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/Mul/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/Mul_grad/Mul_1" + op: "Mul" + input: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/MatMul" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/attention_probs_grad/mul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/Mul_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\000\000\000\000\001\000\000\000\002\000\000\000\003\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/Mul_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/Mul_grad/Mul_1" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/Mul_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/Mul_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/Mul_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/Mul_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/Mul_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/value/einsum/transpose_1_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/value/einsum/transpose_1/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/value/einsum/transpose_1_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/transpose_2_grad/transpose" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/value/einsum/transpose_1_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 64 + } + dim { + size: 12 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/value/Reshape_1_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 768 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/value/Reshape_1_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/value/add_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/value/Reshape_1_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/MatMul_grad/MatMul" + op: "BatchMatMulV2" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/Mul_grad/Mul" + input: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/transpose_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: false + } + } + attr { + key: "adj_y" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/MatMul_grad/MatMul_1" + op: "BatchMatMulV2" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/Mul_grad/Mul" + input: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/transpose" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: true + } + } + attr { + key: "adj_y" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/value/einsum/Reshape_2_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\004\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/value/einsum/Reshape_2_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/value/einsum/transpose_1_grad/transpose" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/value/einsum/Reshape_2_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/transpose_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/transpose/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/transpose_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/MatMul_grad/MatMul" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/transpose_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/transpose_1_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/transpose_1/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/transpose_1_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/MatMul_grad/MatMul_1" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/transpose_1_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/value/einsum/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/value/einsum/Reshape_2_grad/Reshape" + input: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/value/einsum/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/value/einsum/MatMul_grad/MatMul_1" + op: "MatMul" + input: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/value/einsum/Reshape" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/value/einsum/Reshape_2_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/query/add_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/query/add_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/transpose_grad/transpose" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/query/add_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/query/add_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\014\000\000\000@\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/query/add_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/query/add_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/query/add_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/key/add_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/key/add_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/transpose_1_grad/transpose" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/key/add_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/key/add_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\014\000\000\000@\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/key/add_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/key/add_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/key/add_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/value/einsum/Reshape_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/value/einsum/Reshape_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/value/einsum/MatMul_grad/MatMul" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/value/einsum/Reshape_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/value/einsum/Reshape_1_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\000\003\000\000@\000\000\000\014\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/value/einsum/Reshape_1_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/value/einsum/MatMul_grad/MatMul_1" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/value/einsum/Reshape_1_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 64 + } + dim { + size: 12 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/query/einsum/transpose_1_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/query/einsum/transpose_1/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/query/einsum/transpose_1_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/transpose_grad/transpose" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/query/einsum/transpose_1_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 64 + } + dim { + size: 12 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/query/Reshape_1_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 768 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/query/Reshape_1_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/query/add_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/query/Reshape_1_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/key/einsum/transpose_1_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/key/einsum/transpose_1/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/key/einsum/transpose_1_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/transpose_1_grad/transpose" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/key/einsum/transpose_1_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 64 + } + dim { + size: 12 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/key/Reshape_1_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 768 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/key/Reshape_1_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/key/add_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/key/Reshape_1_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/value/einsum/transpose_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/value/einsum/transpose/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/value/einsum/transpose_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/value/einsum/Reshape_1_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/value/einsum/transpose_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/query/einsum/Reshape_2_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\004\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/query/einsum/Reshape_2_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/query/einsum/transpose_1_grad/transpose" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/query/einsum/Reshape_2_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/key/einsum/Reshape_2_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\004\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/key/einsum/Reshape_2_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/key/einsum/transpose_1_grad/transpose" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/key/einsum/Reshape_2_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/value/Reshape_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/value/Reshape_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/value/einsum/transpose_grad/transpose" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/value/Reshape_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/query/einsum/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/query/einsum/Reshape_2_grad/Reshape" + input: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/query/einsum/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/query/einsum/MatMul_grad/MatMul_1" + op: "MatMul" + input: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/query/einsum/Reshape" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/query/einsum/Reshape_2_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/key/einsum/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/key/einsum/Reshape_2_grad/Reshape" + input: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/key/einsum/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/key/einsum/MatMul_grad/MatMul_1" + op: "MatMul" + input: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/key/einsum/Reshape" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/key/einsum/Reshape_2_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/query/einsum/Reshape_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/query/einsum/Reshape_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/query/einsum/MatMul_grad/MatMul" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/query/einsum/Reshape_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/query/einsum/Reshape_1_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\000\003\000\000@\000\000\000\014\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/query/einsum/Reshape_1_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/query/einsum/MatMul_grad/MatMul_1" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/query/einsum/Reshape_1_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 64 + } + dim { + size: 12 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/key/einsum/Reshape_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/key/einsum/Reshape_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/key/einsum/MatMul_grad/MatMul" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/key/einsum/Reshape_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/key/einsum/Reshape_1_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\000\003\000\000@\000\000\000\014\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/key/einsum/Reshape_1_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/key/einsum/MatMul_grad/MatMul_1" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/key/einsum/Reshape_1_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 64 + } + dim { + size: 12 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/query/einsum/transpose_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/query/einsum/transpose/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/query/einsum/transpose_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/query/einsum/Reshape_1_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/query/einsum/transpose_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/AddN_66" + op: "AddN" + input: "gradients/AddN_65" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/value/einsum/Reshape_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/query/einsum/Reshape_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/key/einsum/Reshape_grad/Reshape" + attr { + key: "N" + value { + i: 4 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm/batchnorm/mul_1_grad/Mul" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/key/einsum/transpose_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/key/einsum/transpose/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/key/einsum/transpose_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/key/einsum/Reshape_1_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/key/einsum/transpose_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/query/Reshape_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/query/Reshape_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/query/einsum/transpose_grad/transpose" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/query/Reshape_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm_1/batchnorm/mul_1_grad/Mul" + op: "Mul" + input: "gradients/AddN_66" + input: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm_1/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm_1/batchnorm/mul_1_grad/Mul_1" + op: "Mul" + input: "gradients/AddN_66" + input: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/add_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm_1/batchnorm/sub_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm_1/batchnorm/sub_grad/Sum" + op: "Sum" + input: "gradients/AddN_66" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm_1/batchnorm/sub_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm_1/batchnorm/sub_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 768 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm_1/batchnorm/sub_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm_1/batchnorm/sub_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm_1/batchnorm/sub_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm_1/batchnorm/sub_grad/Neg" + op: "Neg" + input: "gradients/AddN_66" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/key/Reshape_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/key/Reshape_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/key/einsum/transpose_grad/transpose" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/key/Reshape_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm_1/batchnorm/mul_2_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm_1/batchnorm/sub_grad/Neg" + input: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm_1/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm_1/batchnorm/mul_2_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 2 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm_1/batchnorm/mul_2_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm_1/batchnorm/mul_2_grad/Mul" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm_1/batchnorm/mul_2_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm_1/batchnorm/mul_2_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm_1/batchnorm/mul_2_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm_1/batchnorm/mul_2_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm_1/batchnorm/mul_2_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm_1/batchnorm/mul_2_grad/Mul_1" + op: "Mul" + input: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm_1/moments/mean" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm_1/batchnorm/sub_grad/Neg" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/AddN_67" + op: "AddN" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm_1/batchnorm/mul_1_grad/Mul_1" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm_1/batchnorm/mul_2_grad/Mul_1" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm_1/batchnorm/mul_1_grad/Mul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Mul" + op: "Mul" + input: "gradients/AddN_67" + input: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm_1/gamma/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 2 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Mul" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Mul_1" + op: "Mul" + input: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm_1/batchnorm/Rsqrt" + input: "gradients/AddN_67" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Sum_1/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Mul_1" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Sum_1/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Reshape_1/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 768 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Sum_1" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Reshape_1/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm_1/batchnorm/Rsqrt_grad/RsqrtGrad" + op: "RsqrtGrad" + input: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm_1/batchnorm/Rsqrt" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm_1/batchnorm/add_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\000\000\000\000\001\000\000\000\002\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm_1/batchnorm/add_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm_1/batchnorm/Rsqrt_grad/RsqrtGrad" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm_1/batchnorm/add_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm_1/batchnorm/add_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm_1/batchnorm/add_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm_1/batchnorm/add_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm_1/batchnorm/add_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm_1/moments/variance_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm_1/moments/variance_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm_1/batchnorm/Rsqrt_grad/RsqrtGrad" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm_1/moments/variance_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm_1/moments/variance_grad/Tile/multiples" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\001\000\000\000\001\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm_1/moments/variance_grad/Tile" + op: "Tile" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm_1/moments/variance_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm_1/moments/variance_grad/Tile/multiples" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tmultiples" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm_1/moments/variance_grad/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 768.0 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm_1/moments/variance_grad/truediv" + op: "RealDiv" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm_1/moments/variance_grad/Tile" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm_1/moments/variance_grad/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm_1/moments/SquaredDifference_grad/scalar" + op: "Const" + input: "^gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm_1/moments/variance_grad/truediv" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 2.0 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm_1/moments/SquaredDifference_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm_1/moments/SquaredDifference_grad/scalar" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm_1/moments/variance_grad/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm_1/moments/SquaredDifference_grad/sub" + op: "Sub" + input: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/add_1" + input: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm_1/moments/StopGradient" + input: "^gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm_1/moments/variance_grad/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm_1/moments/SquaredDifference_grad/mul_1" + op: "Mul" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm_1/moments/SquaredDifference_grad/Mul" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm_1/moments/SquaredDifference_grad/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm_1/moments/SquaredDifference_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 2 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm_1/moments/SquaredDifference_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm_1/moments/SquaredDifference_grad/mul_1" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm_1/moments/SquaredDifference_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm_1/moments/SquaredDifference_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm_1/moments/SquaredDifference_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm_1/moments/SquaredDifference_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm_1/moments/SquaredDifference_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm_1/moments/SquaredDifference_grad/Neg" + op: "Neg" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm_1/moments/SquaredDifference_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm_1/moments/mean_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm_1/moments/mean_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm_1/batchnorm/mul_2_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm_1/moments/mean_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm_1/moments/mean_grad/Tile/multiples" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\001\000\000\000\001\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm_1/moments/mean_grad/Tile" + op: "Tile" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm_1/moments/mean_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm_1/moments/mean_grad/Tile/multiples" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tmultiples" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm_1/moments/mean_grad/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 768.0 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm_1/moments/mean_grad/truediv" + op: "RealDiv" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm_1/moments/mean_grad/Tile" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm_1/moments/mean_grad/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/AddN_68" + op: "AddN" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm_1/batchnorm/mul_1_grad/Mul" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm_1/moments/SquaredDifference_grad/mul_1" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm_1/moments/mean_grad/truediv" + attr { + key: "N" + value { + i: 3 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm_1/batchnorm/mul_1_grad/Mul" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/ffn_1/intermediate/output/dense/add_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/ffn_1/intermediate/output/dense/add_grad/Sum" + op: "Sum" + input: "gradients/AddN_68" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/ffn_1/intermediate/output/dense/add_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/ffn_1/intermediate/output/dense/add_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 768 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/ffn_1/intermediate/output/dense/add_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/ffn_1/intermediate/output/dense/add_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/ffn_1/intermediate/output/dense/add_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape_1_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\004\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape_1_grad/Reshape" + op: "Reshape" + input: "gradients/AddN_68" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape_1_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/ffn_1/intermediate/output/dense/einsum/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape_1_grad/Reshape" + input: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/ffn_1/intermediate/output/dense/einsum/MatMul_grad/MatMul_1" + op: "MatMul" + input: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape_1_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\000\014\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/ffn_1/intermediate/output/dense/einsum/MatMul_grad/MatMul" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/ffn_1/intermediate/mul_3_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape_grad/Reshape" + input: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/ffn_1/intermediate/mul_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/ffn_1/intermediate/mul_3_grad/Mul_1" + op: "Mul" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape_grad/Reshape" + input: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/ffn_1/intermediate/dense/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/ffn_1/intermediate/mul_2_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/ffn_1/intermediate/mul_3_grad/Mul_1" + input: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/ffn_1/intermediate/add_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/ffn_1/intermediate/mul_2_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\000\000\000\000\001\000\000\000\002\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/ffn_1/intermediate/mul_2_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/ffn_1/intermediate/mul_2_grad/Mul" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/ffn_1/intermediate/mul_2_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/ffn_1/intermediate/mul_2_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/ffn_1/intermediate/mul_2_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/ffn_1/intermediate/mul_2_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/ffn_1/intermediate/mul_2_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/ffn_1/intermediate/mul_2_grad/Mul_1" + op: "Mul" + input: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/ffn_1/intermediate/mul_2/x" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/ffn_1/intermediate/mul_3_grad/Mul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/ffn_1/intermediate/add_1_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\000\000\000\000\001\000\000\000\002\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/ffn_1/intermediate/add_1_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/ffn_1/intermediate/mul_2_grad/Mul_1" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/ffn_1/intermediate/add_1_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/ffn_1/intermediate/add_1_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/ffn_1/intermediate/add_1_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/ffn_1/intermediate/add_1_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/ffn_1/intermediate/add_1_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/ffn_1/intermediate/Tanh_grad/TanhGrad" + op: "TanhGrad" + input: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/ffn_1/intermediate/Tanh" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/ffn_1/intermediate/mul_2_grad/Mul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/ffn_1/intermediate/mul_1_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/ffn_1/intermediate/Tanh_grad/TanhGrad" + input: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/ffn_1/intermediate/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/ffn_1/intermediate/mul_1_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\000\000\000\000\001\000\000\000\002\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/ffn_1/intermediate/mul_1_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/ffn_1/intermediate/mul_1_grad/Mul" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/ffn_1/intermediate/mul_1_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/ffn_1/intermediate/mul_1_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/ffn_1/intermediate/mul_1_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/ffn_1/intermediate/mul_1_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/ffn_1/intermediate/mul_1_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/ffn_1/intermediate/mul_1_grad/Mul_1" + op: "Mul" + input: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/ffn_1/intermediate/mul_1/x" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/ffn_1/intermediate/Tanh_grad/TanhGrad" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/ffn_1/intermediate/mul_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/ffn_1/intermediate/mul_1_grad/Mul_1" + input: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/ffn_1/intermediate/Pow" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/ffn_1/intermediate/mul_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\000\000\000\000\001\000\000\000\002\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/ffn_1/intermediate/mul_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/ffn_1/intermediate/mul_grad/Mul" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/ffn_1/intermediate/mul_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/ffn_1/intermediate/mul_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/ffn_1/intermediate/mul_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/ffn_1/intermediate/mul_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/ffn_1/intermediate/mul_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/ffn_1/intermediate/mul_grad/Mul_1" + op: "Mul" + input: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/ffn_1/intermediate/mul/x" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/ffn_1/intermediate/mul_1_grad/Mul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/ffn_1/intermediate/Pow_grad/mul" + op: "Mul" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/ffn_1/intermediate/mul_grad/Mul_1" + input: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/ffn_1/intermediate/Pow/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/ffn_1/intermediate/Pow_grad/sub/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/ffn_1/intermediate/Pow_grad/sub" + op: "Sub" + input: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/ffn_1/intermediate/Pow/y" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/ffn_1/intermediate/Pow_grad/sub/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/ffn_1/intermediate/Pow_grad/Pow" + op: "Pow" + input: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/ffn_1/intermediate/dense/add" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/ffn_1/intermediate/Pow_grad/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/ffn_1/intermediate/Pow_grad/mul_1" + op: "Mul" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/ffn_1/intermediate/Pow_grad/mul" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/ffn_1/intermediate/Pow_grad/Pow" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/ffn_1/intermediate/Pow_grad/Greater/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/ffn_1/intermediate/Pow_grad/Greater" + op: "Greater" + input: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/ffn_1/intermediate/dense/add" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/ffn_1/intermediate/Pow_grad/Greater/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/ffn_1/intermediate/Pow_grad/ones_like/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\000\014\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/ffn_1/intermediate/Pow_grad/ones_like/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/ffn_1/intermediate/Pow_grad/ones_like" + op: "Fill" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/ffn_1/intermediate/Pow_grad/ones_like/Shape" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/ffn_1/intermediate/Pow_grad/ones_like/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/ffn_1/intermediate/Pow_grad/Select" + op: "Select" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/ffn_1/intermediate/Pow_grad/Greater" + input: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/ffn_1/intermediate/dense/add" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/ffn_1/intermediate/Pow_grad/ones_like" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/ffn_1/intermediate/Pow_grad/Log" + op: "Log" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/ffn_1/intermediate/Pow_grad/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/ffn_1/intermediate/Pow_grad/zeros_like/shape_as_tensor" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\000\014\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/ffn_1/intermediate/Pow_grad/zeros_like/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/ffn_1/intermediate/Pow_grad/zeros_like" + op: "Fill" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/ffn_1/intermediate/Pow_grad/zeros_like/shape_as_tensor" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/ffn_1/intermediate/Pow_grad/zeros_like/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/ffn_1/intermediate/Pow_grad/Select_1" + op: "Select" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/ffn_1/intermediate/Pow_grad/Greater" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/ffn_1/intermediate/Pow_grad/Log" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/ffn_1/intermediate/Pow_grad/zeros_like" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/ffn_1/intermediate/Pow_grad/mul_2" + op: "Mul" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/ffn_1/intermediate/mul_grad/Mul_1" + input: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/ffn_1/intermediate/Pow" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/ffn_1/intermediate/Pow_grad/mul_3" + op: "Mul" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/ffn_1/intermediate/Pow_grad/mul_2" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/ffn_1/intermediate/Pow_grad/Select_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/ffn_1/intermediate/Pow_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\000\000\000\000\001\000\000\000\002\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/ffn_1/intermediate/Pow_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/ffn_1/intermediate/Pow_grad/mul_3" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/ffn_1/intermediate/Pow_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/ffn_1/intermediate/Pow_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/ffn_1/intermediate/Pow_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/ffn_1/intermediate/Pow_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/ffn_1/intermediate/Pow_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/AddN_69" + op: "AddN" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/ffn_1/intermediate/mul_3_grad/Mul" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/ffn_1/intermediate/mul_1_grad/Mul_1" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/ffn_1/intermediate/Pow_grad/mul_1" + attr { + key: "N" + value { + i: 3 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/ffn_1/intermediate/mul_3_grad/Mul" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/ffn_1/intermediate/dense/add_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/ffn_1/intermediate/dense/add_grad/Sum" + op: "Sum" + input: "gradients/AddN_69" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/ffn_1/intermediate/dense/add_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/ffn_1/intermediate/dense/add_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 3072 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/ffn_1/intermediate/dense/add_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/ffn_1/intermediate/dense/add_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/ffn_1/intermediate/dense/add_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/ffn_1/intermediate/dense/einsum/Reshape_1_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\004\000\000\000\014\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/ffn_1/intermediate/dense/einsum/Reshape_1_grad/Reshape" + op: "Reshape" + input: "gradients/AddN_69" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/ffn_1/intermediate/dense/einsum/Reshape_1_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/ffn_1/intermediate/dense/einsum/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/ffn_1/intermediate/dense/einsum/Reshape_1_grad/Reshape" + input: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/ffn_1/intermediate/dense/einsum/MatMul_grad/MatMul_1" + op: "MatMul" + input: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/ffn_1/intermediate/dense/einsum/Reshape" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/ffn_1/intermediate/dense/einsum/Reshape_1_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/ffn_1/intermediate/dense/einsum/Reshape_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/ffn_1/intermediate/dense/einsum/Reshape_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/ffn_1/intermediate/dense/einsum/MatMul_grad/MatMul" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/ffn_1/intermediate/dense/einsum/Reshape_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/AddN_70" + op: "AddN" + input: "gradients/AddN_68" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/ffn_1/intermediate/dense/einsum/Reshape_grad/Reshape" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm_1/batchnorm/mul_1_grad/Mul" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm/batchnorm/mul_1_grad/Mul" + op: "Mul" + input: "gradients/AddN_70" + input: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm/batchnorm/mul_1_grad/Mul_1" + op: "Mul" + input: "gradients/AddN_70" + input: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm/batchnorm/sub_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm/batchnorm/sub_grad/Sum" + op: "Sum" + input: "gradients/AddN_70" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm/batchnorm/sub_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm/batchnorm/sub_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 768 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm/batchnorm/sub_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm/batchnorm/sub_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm/batchnorm/sub_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm/batchnorm/sub_grad/Neg" + op: "Neg" + input: "gradients/AddN_70" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm/batchnorm/mul_2_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm/batchnorm/sub_grad/Neg" + input: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm/batchnorm/mul_2_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 2 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm/batchnorm/mul_2_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm/batchnorm/mul_2_grad/Mul" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm/batchnorm/mul_2_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm/batchnorm/mul_2_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm/batchnorm/mul_2_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm/batchnorm/mul_2_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm/batchnorm/mul_2_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm/batchnorm/mul_2_grad/Mul_1" + op: "Mul" + input: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm/moments/mean" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm/batchnorm/sub_grad/Neg" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/AddN_71" + op: "AddN" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm/batchnorm/mul_1_grad/Mul_1" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm/batchnorm/mul_2_grad/Mul_1" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm/batchnorm/mul_1_grad/Mul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm/batchnorm/mul_grad/Mul" + op: "Mul" + input: "gradients/AddN_71" + input: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm/gamma/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm/batchnorm/mul_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 2 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm/batchnorm/mul_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm/batchnorm/mul_grad/Mul" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm/batchnorm/mul_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm/batchnorm/mul_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm/batchnorm/mul_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm/batchnorm/mul_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm/batchnorm/mul_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm/batchnorm/mul_grad/Mul_1" + op: "Mul" + input: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm/batchnorm/Rsqrt" + input: "gradients/AddN_71" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm/batchnorm/mul_grad/Sum_1/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm/batchnorm/mul_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm/batchnorm/mul_grad/Mul_1" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm/batchnorm/mul_grad/Sum_1/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm/batchnorm/mul_grad/Reshape_1/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 768 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm/batchnorm/mul_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm/batchnorm/mul_grad/Sum_1" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm/batchnorm/mul_grad/Reshape_1/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad" + op: "RsqrtGrad" + input: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm/batchnorm/Rsqrt" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm/batchnorm/mul_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm/batchnorm/add_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\000\000\000\000\001\000\000\000\002\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm/batchnorm/add_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm/batchnorm/add_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm/batchnorm/add_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm/batchnorm/add_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm/batchnorm/add_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm/batchnorm/add_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm/moments/variance_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm/moments/variance_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm/moments/variance_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm/moments/variance_grad/Tile/multiples" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\001\000\000\000\001\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm/moments/variance_grad/Tile" + op: "Tile" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm/moments/variance_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm/moments/variance_grad/Tile/multiples" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tmultiples" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm/moments/variance_grad/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 768.0 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm/moments/variance_grad/truediv" + op: "RealDiv" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm/moments/variance_grad/Tile" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm/moments/variance_grad/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm/moments/SquaredDifference_grad/scalar" + op: "Const" + input: "^gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm/moments/variance_grad/truediv" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 2.0 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm/moments/SquaredDifference_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm/moments/SquaredDifference_grad/scalar" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm/moments/variance_grad/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm/moments/SquaredDifference_grad/sub" + op: "Sub" + input: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/add" + input: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm/moments/StopGradient" + input: "^gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm/moments/variance_grad/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm/moments/SquaredDifference_grad/mul_1" + op: "Mul" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm/moments/SquaredDifference_grad/Mul" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm/moments/SquaredDifference_grad/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm/moments/SquaredDifference_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 2 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm/moments/SquaredDifference_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm/moments/SquaredDifference_grad/mul_1" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm/moments/SquaredDifference_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm/moments/SquaredDifference_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm/moments/SquaredDifference_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm/moments/SquaredDifference_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm/moments/SquaredDifference_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm/moments/SquaredDifference_grad/Neg" + op: "Neg" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm/moments/SquaredDifference_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm/moments/mean_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm/moments/mean_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm/batchnorm/mul_2_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm/moments/mean_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm/moments/mean_grad/Tile/multiples" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\001\000\000\000\001\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm/moments/mean_grad/Tile" + op: "Tile" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm/moments/mean_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm/moments/mean_grad/Tile/multiples" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tmultiples" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm/moments/mean_grad/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 768.0 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm/moments/mean_grad/truediv" + op: "RealDiv" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm/moments/mean_grad/Tile" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm/moments/mean_grad/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/AddN_72" + op: "AddN" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm/batchnorm/mul_1_grad/Mul" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm/moments/SquaredDifference_grad/mul_1" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm/moments/mean_grad/truediv" + attr { + key: "N" + value { + i: 3 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm/batchnorm/mul_1_grad/Mul" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/output/dense/add_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/output/dense/add_grad/Sum" + op: "Sum" + input: "gradients/AddN_72" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/output/dense/add_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/output/dense/add_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 768 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/output/dense/add_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/output/dense/add_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/output/dense/add_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/output/dense/einsum/Reshape_2_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\004\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/output/dense/einsum/Reshape_2_grad/Reshape" + op: "Reshape" + input: "gradients/AddN_72" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/output/dense/einsum/Reshape_2_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/output/dense/einsum/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/output/dense/einsum/Reshape_2_grad/Reshape" + input: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/output/dense/einsum/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/output/dense/einsum/MatMul_grad/MatMul_1" + op: "MatMul" + input: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/output/dense/einsum/Reshape" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/output/dense/einsum/Reshape_2_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/output/dense/einsum/Reshape_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\020\000\000\000@\000\000\000@\000\000\000\014\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/output/dense/einsum/Reshape_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/output/dense/einsum/MatMul_grad/MatMul" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/output/dense/einsum/Reshape_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 64 + } + dim { + size: 12 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/output/dense/einsum/Reshape_1_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "@\000\000\000\014\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/output/dense/einsum/Reshape_1_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/output/dense/einsum/MatMul_grad/MatMul_1" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/output/dense/einsum/Reshape_1_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 64 + } + dim { + size: 12 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/output/dense/einsum/transpose_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/output/dense/einsum/transpose/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/output/dense/einsum/transpose_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/output/dense/einsum/Reshape_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/output/dense/einsum/transpose_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/output/dense/einsum/transpose_1_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/output/dense/einsum/transpose_1/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/output/dense/einsum/transpose_1_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/output/dense/einsum/Reshape_1_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/output/dense/einsum/transpose_1_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/transpose_3_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/transpose_3/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/transpose_3_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/output/dense/einsum/transpose_grad/transpose" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/transpose_3_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/output/dense/Reshape_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/output/dense/Reshape_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/output/dense/einsum/transpose_1_grad/transpose" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/output/dense/Reshape_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/MatMul_2_grad/MatMul" + op: "BatchMatMulV2" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/transpose_3_grad/transpose" + input: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/transpose_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: false + } + } + attr { + key: "adj_y" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/MatMul_2_grad/MatMul_1" + op: "BatchMatMulV2" + input: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/attention_probs" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/transpose_3_grad/transpose" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: true + } + } + attr { + key: "adj_y" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/attention_probs_grad/mul" + op: "Mul" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/MatMul_2_grad/MatMul" + input: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/attention_probs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/attention_probs_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: -1 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/attention_probs_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/attention_probs_grad/mul" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/attention_probs_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/attention_probs_grad/sub" + op: "Sub" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/MatMul_2_grad/MatMul" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/attention_probs_grad/Sum" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/attention_probs_grad/mul_1" + op: "Mul" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/attention_probs_grad/sub" + input: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/attention_probs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/transpose_2_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/transpose_2/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/transpose_2_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/MatMul_2_grad/MatMul_1" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/transpose_2_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/add_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/add_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/attention_probs_grad/mul_1" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/add_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/add_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\020\000\000\000\001\000\000\000@\000\000\000@\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/add_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/add_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/add_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 1 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/value/add_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/value/add_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/transpose_2_grad/transpose" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/value/add_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/value/add_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\014\000\000\000@\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/value/add_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/value/add_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/value/add_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/Mul_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/attention_probs_grad/mul_1" + input: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/Mul/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/Mul_grad/Mul_1" + op: "Mul" + input: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/MatMul" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/attention_probs_grad/mul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/Mul_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\000\000\000\000\001\000\000\000\002\000\000\000\003\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/Mul_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/Mul_grad/Mul_1" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/Mul_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/Mul_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/Mul_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/Mul_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/Mul_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/value/einsum/transpose_1_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/value/einsum/transpose_1/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/value/einsum/transpose_1_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/transpose_2_grad/transpose" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/value/einsum/transpose_1_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 64 + } + dim { + size: 12 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/value/Reshape_1_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 768 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/value/Reshape_1_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/value/add_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/value/Reshape_1_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/MatMul_grad/MatMul" + op: "BatchMatMulV2" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/Mul_grad/Mul" + input: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/transpose_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: false + } + } + attr { + key: "adj_y" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/MatMul_grad/MatMul_1" + op: "BatchMatMulV2" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/Mul_grad/Mul" + input: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/transpose" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: true + } + } + attr { + key: "adj_y" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/value/einsum/Reshape_2_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\004\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/value/einsum/Reshape_2_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/value/einsum/transpose_1_grad/transpose" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/value/einsum/Reshape_2_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/transpose_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/transpose/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/transpose_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/MatMul_grad/MatMul" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/transpose_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/transpose_1_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/transpose_1/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/transpose_1_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/MatMul_grad/MatMul_1" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/transpose_1_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/value/einsum/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/value/einsum/Reshape_2_grad/Reshape" + input: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/value/einsum/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/value/einsum/MatMul_grad/MatMul_1" + op: "MatMul" + input: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/value/einsum/Reshape" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/value/einsum/Reshape_2_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/query/add_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/query/add_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/transpose_grad/transpose" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/query/add_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/query/add_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\014\000\000\000@\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/query/add_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/query/add_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/query/add_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/key/add_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/key/add_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/transpose_1_grad/transpose" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/key/add_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/key/add_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\014\000\000\000@\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/key/add_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/key/add_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/key/add_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/value/einsum/Reshape_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/value/einsum/Reshape_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/value/einsum/MatMul_grad/MatMul" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/value/einsum/Reshape_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/value/einsum/Reshape_1_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\000\003\000\000@\000\000\000\014\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/value/einsum/Reshape_1_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/value/einsum/MatMul_grad/MatMul_1" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/value/einsum/Reshape_1_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 64 + } + dim { + size: 12 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/query/einsum/transpose_1_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/query/einsum/transpose_1/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/query/einsum/transpose_1_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/transpose_grad/transpose" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/query/einsum/transpose_1_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 64 + } + dim { + size: 12 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/query/Reshape_1_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 768 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/query/Reshape_1_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/query/add_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/query/Reshape_1_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/key/einsum/transpose_1_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/key/einsum/transpose_1/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/key/einsum/transpose_1_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/transpose_1_grad/transpose" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/key/einsum/transpose_1_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 64 + } + dim { + size: 12 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/key/Reshape_1_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 768 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/key/Reshape_1_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/key/add_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/key/Reshape_1_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/value/einsum/transpose_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/value/einsum/transpose/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/value/einsum/transpose_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/value/einsum/Reshape_1_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/value/einsum/transpose_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/query/einsum/Reshape_2_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\004\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/query/einsum/Reshape_2_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/query/einsum/transpose_1_grad/transpose" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/query/einsum/Reshape_2_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/key/einsum/Reshape_2_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\004\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/key/einsum/Reshape_2_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/key/einsum/transpose_1_grad/transpose" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/key/einsum/Reshape_2_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/value/Reshape_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/value/Reshape_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/value/einsum/transpose_grad/transpose" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/value/Reshape_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/query/einsum/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/query/einsum/Reshape_2_grad/Reshape" + input: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/query/einsum/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/query/einsum/MatMul_grad/MatMul_1" + op: "MatMul" + input: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/query/einsum/Reshape" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/query/einsum/Reshape_2_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/key/einsum/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/key/einsum/Reshape_2_grad/Reshape" + input: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/key/einsum/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/key/einsum/MatMul_grad/MatMul_1" + op: "MatMul" + input: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/key/einsum/Reshape" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/key/einsum/Reshape_2_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/query/einsum/Reshape_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/query/einsum/Reshape_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/query/einsum/MatMul_grad/MatMul" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/query/einsum/Reshape_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/query/einsum/Reshape_1_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\000\003\000\000@\000\000\000\014\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/query/einsum/Reshape_1_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/query/einsum/MatMul_grad/MatMul_1" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/query/einsum/Reshape_1_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 64 + } + dim { + size: 12 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/key/einsum/Reshape_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/key/einsum/Reshape_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/key/einsum/MatMul_grad/MatMul" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/key/einsum/Reshape_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/key/einsum/Reshape_1_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\000\003\000\000@\000\000\000\014\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/key/einsum/Reshape_1_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/key/einsum/MatMul_grad/MatMul_1" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/key/einsum/Reshape_1_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 64 + } + dim { + size: 12 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/query/einsum/transpose_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/query/einsum/transpose/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/query/einsum/transpose_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/query/einsum/Reshape_1_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/query/einsum/transpose_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/AddN_73" + op: "AddN" + input: "gradients/AddN_72" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/value/einsum/Reshape_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/query/einsum/Reshape_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/key/einsum/Reshape_grad/Reshape" + attr { + key: "N" + value { + i: 4 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm/batchnorm/mul_1_grad/Mul" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/key/einsum/transpose_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/key/einsum/transpose/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/key/einsum/transpose_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/key/einsum/Reshape_1_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/key/einsum/transpose_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/query/Reshape_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/query/Reshape_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/query/einsum/transpose_grad/transpose" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/query/Reshape_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm_1/batchnorm/mul_1_grad/Mul" + op: "Mul" + input: "gradients/AddN_73" + input: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm_1/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm_1/batchnorm/mul_1_grad/Mul_1" + op: "Mul" + input: "gradients/AddN_73" + input: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/add_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm_1/batchnorm/sub_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm_1/batchnorm/sub_grad/Sum" + op: "Sum" + input: "gradients/AddN_73" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm_1/batchnorm/sub_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm_1/batchnorm/sub_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 768 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm_1/batchnorm/sub_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm_1/batchnorm/sub_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm_1/batchnorm/sub_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm_1/batchnorm/sub_grad/Neg" + op: "Neg" + input: "gradients/AddN_73" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/key/Reshape_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/key/Reshape_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/key/einsum/transpose_grad/transpose" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/key/Reshape_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm_1/batchnorm/mul_2_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm_1/batchnorm/sub_grad/Neg" + input: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm_1/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm_1/batchnorm/mul_2_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 2 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm_1/batchnorm/mul_2_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm_1/batchnorm/mul_2_grad/Mul" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm_1/batchnorm/mul_2_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm_1/batchnorm/mul_2_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm_1/batchnorm/mul_2_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm_1/batchnorm/mul_2_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm_1/batchnorm/mul_2_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm_1/batchnorm/mul_2_grad/Mul_1" + op: "Mul" + input: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm_1/moments/mean" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm_1/batchnorm/sub_grad/Neg" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/AddN_74" + op: "AddN" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm_1/batchnorm/mul_1_grad/Mul_1" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm_1/batchnorm/mul_2_grad/Mul_1" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm_1/batchnorm/mul_1_grad/Mul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Mul" + op: "Mul" + input: "gradients/AddN_74" + input: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm_1/gamma/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 2 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Mul" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Mul_1" + op: "Mul" + input: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm_1/batchnorm/Rsqrt" + input: "gradients/AddN_74" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Sum_1/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Mul_1" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Sum_1/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Reshape_1/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 768 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Sum_1" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Reshape_1/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm_1/batchnorm/Rsqrt_grad/RsqrtGrad" + op: "RsqrtGrad" + input: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm_1/batchnorm/Rsqrt" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm_1/batchnorm/add_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\000\000\000\000\001\000\000\000\002\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm_1/batchnorm/add_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm_1/batchnorm/Rsqrt_grad/RsqrtGrad" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm_1/batchnorm/add_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm_1/batchnorm/add_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm_1/batchnorm/add_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm_1/batchnorm/add_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm_1/batchnorm/add_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm_1/moments/variance_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm_1/moments/variance_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm_1/batchnorm/Rsqrt_grad/RsqrtGrad" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm_1/moments/variance_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm_1/moments/variance_grad/Tile/multiples" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\001\000\000\000\001\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm_1/moments/variance_grad/Tile" + op: "Tile" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm_1/moments/variance_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm_1/moments/variance_grad/Tile/multiples" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tmultiples" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm_1/moments/variance_grad/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 768.0 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm_1/moments/variance_grad/truediv" + op: "RealDiv" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm_1/moments/variance_grad/Tile" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm_1/moments/variance_grad/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm_1/moments/SquaredDifference_grad/scalar" + op: "Const" + input: "^gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm_1/moments/variance_grad/truediv" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 2.0 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm_1/moments/SquaredDifference_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm_1/moments/SquaredDifference_grad/scalar" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm_1/moments/variance_grad/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm_1/moments/SquaredDifference_grad/sub" + op: "Sub" + input: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/add_1" + input: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm_1/moments/StopGradient" + input: "^gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm_1/moments/variance_grad/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm_1/moments/SquaredDifference_grad/mul_1" + op: "Mul" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm_1/moments/SquaredDifference_grad/Mul" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm_1/moments/SquaredDifference_grad/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm_1/moments/SquaredDifference_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 2 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm_1/moments/SquaredDifference_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm_1/moments/SquaredDifference_grad/mul_1" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm_1/moments/SquaredDifference_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm_1/moments/SquaredDifference_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm_1/moments/SquaredDifference_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm_1/moments/SquaredDifference_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm_1/moments/SquaredDifference_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm_1/moments/SquaredDifference_grad/Neg" + op: "Neg" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm_1/moments/SquaredDifference_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm_1/moments/mean_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm_1/moments/mean_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm_1/batchnorm/mul_2_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm_1/moments/mean_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm_1/moments/mean_grad/Tile/multiples" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\001\000\000\000\001\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm_1/moments/mean_grad/Tile" + op: "Tile" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm_1/moments/mean_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm_1/moments/mean_grad/Tile/multiples" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tmultiples" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm_1/moments/mean_grad/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 768.0 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm_1/moments/mean_grad/truediv" + op: "RealDiv" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm_1/moments/mean_grad/Tile" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm_1/moments/mean_grad/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/AddN_75" + op: "AddN" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm_1/batchnorm/mul_1_grad/Mul" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm_1/moments/SquaredDifference_grad/mul_1" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm_1/moments/mean_grad/truediv" + attr { + key: "N" + value { + i: 3 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm_1/batchnorm/mul_1_grad/Mul" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/ffn_1/intermediate/output/dense/add_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/ffn_1/intermediate/output/dense/add_grad/Sum" + op: "Sum" + input: "gradients/AddN_75" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/ffn_1/intermediate/output/dense/add_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/ffn_1/intermediate/output/dense/add_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 768 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/ffn_1/intermediate/output/dense/add_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/ffn_1/intermediate/output/dense/add_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/ffn_1/intermediate/output/dense/add_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape_1_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\004\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape_1_grad/Reshape" + op: "Reshape" + input: "gradients/AddN_75" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape_1_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/ffn_1/intermediate/output/dense/einsum/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape_1_grad/Reshape" + input: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/ffn_1/intermediate/output/dense/einsum/MatMul_grad/MatMul_1" + op: "MatMul" + input: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape_1_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\000\014\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/ffn_1/intermediate/output/dense/einsum/MatMul_grad/MatMul" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/ffn_1/intermediate/mul_3_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape_grad/Reshape" + input: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/ffn_1/intermediate/mul_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/ffn_1/intermediate/mul_3_grad/Mul_1" + op: "Mul" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape_grad/Reshape" + input: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/ffn_1/intermediate/dense/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/ffn_1/intermediate/mul_2_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/ffn_1/intermediate/mul_3_grad/Mul_1" + input: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/ffn_1/intermediate/add_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/ffn_1/intermediate/mul_2_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\000\000\000\000\001\000\000\000\002\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/ffn_1/intermediate/mul_2_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/ffn_1/intermediate/mul_2_grad/Mul" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/ffn_1/intermediate/mul_2_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/ffn_1/intermediate/mul_2_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/ffn_1/intermediate/mul_2_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/ffn_1/intermediate/mul_2_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/ffn_1/intermediate/mul_2_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/ffn_1/intermediate/mul_2_grad/Mul_1" + op: "Mul" + input: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/ffn_1/intermediate/mul_2/x" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/ffn_1/intermediate/mul_3_grad/Mul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/ffn_1/intermediate/add_1_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\000\000\000\000\001\000\000\000\002\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/ffn_1/intermediate/add_1_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/ffn_1/intermediate/mul_2_grad/Mul_1" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/ffn_1/intermediate/add_1_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/ffn_1/intermediate/add_1_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/ffn_1/intermediate/add_1_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/ffn_1/intermediate/add_1_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/ffn_1/intermediate/add_1_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/ffn_1/intermediate/Tanh_grad/TanhGrad" + op: "TanhGrad" + input: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/ffn_1/intermediate/Tanh" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/ffn_1/intermediate/mul_2_grad/Mul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/ffn_1/intermediate/mul_1_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/ffn_1/intermediate/Tanh_grad/TanhGrad" + input: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/ffn_1/intermediate/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/ffn_1/intermediate/mul_1_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\000\000\000\000\001\000\000\000\002\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/ffn_1/intermediate/mul_1_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/ffn_1/intermediate/mul_1_grad/Mul" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/ffn_1/intermediate/mul_1_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/ffn_1/intermediate/mul_1_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/ffn_1/intermediate/mul_1_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/ffn_1/intermediate/mul_1_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/ffn_1/intermediate/mul_1_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/ffn_1/intermediate/mul_1_grad/Mul_1" + op: "Mul" + input: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/ffn_1/intermediate/mul_1/x" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/ffn_1/intermediate/Tanh_grad/TanhGrad" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/ffn_1/intermediate/mul_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/ffn_1/intermediate/mul_1_grad/Mul_1" + input: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/ffn_1/intermediate/Pow" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/ffn_1/intermediate/mul_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\000\000\000\000\001\000\000\000\002\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/ffn_1/intermediate/mul_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/ffn_1/intermediate/mul_grad/Mul" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/ffn_1/intermediate/mul_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/ffn_1/intermediate/mul_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/ffn_1/intermediate/mul_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/ffn_1/intermediate/mul_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/ffn_1/intermediate/mul_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/ffn_1/intermediate/mul_grad/Mul_1" + op: "Mul" + input: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/ffn_1/intermediate/mul/x" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/ffn_1/intermediate/mul_1_grad/Mul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/ffn_1/intermediate/Pow_grad/mul" + op: "Mul" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/ffn_1/intermediate/mul_grad/Mul_1" + input: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/ffn_1/intermediate/Pow/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/ffn_1/intermediate/Pow_grad/sub/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/ffn_1/intermediate/Pow_grad/sub" + op: "Sub" + input: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/ffn_1/intermediate/Pow/y" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/ffn_1/intermediate/Pow_grad/sub/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/ffn_1/intermediate/Pow_grad/Pow" + op: "Pow" + input: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/ffn_1/intermediate/dense/add" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/ffn_1/intermediate/Pow_grad/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/ffn_1/intermediate/Pow_grad/mul_1" + op: "Mul" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/ffn_1/intermediate/Pow_grad/mul" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/ffn_1/intermediate/Pow_grad/Pow" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/ffn_1/intermediate/Pow_grad/Greater/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/ffn_1/intermediate/Pow_grad/Greater" + op: "Greater" + input: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/ffn_1/intermediate/dense/add" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/ffn_1/intermediate/Pow_grad/Greater/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/ffn_1/intermediate/Pow_grad/ones_like/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\000\014\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/ffn_1/intermediate/Pow_grad/ones_like/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/ffn_1/intermediate/Pow_grad/ones_like" + op: "Fill" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/ffn_1/intermediate/Pow_grad/ones_like/Shape" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/ffn_1/intermediate/Pow_grad/ones_like/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/ffn_1/intermediate/Pow_grad/Select" + op: "Select" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/ffn_1/intermediate/Pow_grad/Greater" + input: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/ffn_1/intermediate/dense/add" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/ffn_1/intermediate/Pow_grad/ones_like" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/ffn_1/intermediate/Pow_grad/Log" + op: "Log" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/ffn_1/intermediate/Pow_grad/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/ffn_1/intermediate/Pow_grad/zeros_like/shape_as_tensor" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\000\014\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/ffn_1/intermediate/Pow_grad/zeros_like/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/ffn_1/intermediate/Pow_grad/zeros_like" + op: "Fill" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/ffn_1/intermediate/Pow_grad/zeros_like/shape_as_tensor" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/ffn_1/intermediate/Pow_grad/zeros_like/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/ffn_1/intermediate/Pow_grad/Select_1" + op: "Select" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/ffn_1/intermediate/Pow_grad/Greater" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/ffn_1/intermediate/Pow_grad/Log" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/ffn_1/intermediate/Pow_grad/zeros_like" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/ffn_1/intermediate/Pow_grad/mul_2" + op: "Mul" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/ffn_1/intermediate/mul_grad/Mul_1" + input: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/ffn_1/intermediate/Pow" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/ffn_1/intermediate/Pow_grad/mul_3" + op: "Mul" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/ffn_1/intermediate/Pow_grad/mul_2" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/ffn_1/intermediate/Pow_grad/Select_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/ffn_1/intermediate/Pow_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\000\000\000\000\001\000\000\000\002\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/ffn_1/intermediate/Pow_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/ffn_1/intermediate/Pow_grad/mul_3" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/ffn_1/intermediate/Pow_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/ffn_1/intermediate/Pow_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/ffn_1/intermediate/Pow_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/ffn_1/intermediate/Pow_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/ffn_1/intermediate/Pow_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/AddN_76" + op: "AddN" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/ffn_1/intermediate/mul_3_grad/Mul" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/ffn_1/intermediate/mul_1_grad/Mul_1" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/ffn_1/intermediate/Pow_grad/mul_1" + attr { + key: "N" + value { + i: 3 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/ffn_1/intermediate/mul_3_grad/Mul" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/ffn_1/intermediate/dense/add_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/ffn_1/intermediate/dense/add_grad/Sum" + op: "Sum" + input: "gradients/AddN_76" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/ffn_1/intermediate/dense/add_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/ffn_1/intermediate/dense/add_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 3072 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/ffn_1/intermediate/dense/add_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/ffn_1/intermediate/dense/add_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/ffn_1/intermediate/dense/add_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/ffn_1/intermediate/dense/einsum/Reshape_1_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\004\000\000\000\014\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/ffn_1/intermediate/dense/einsum/Reshape_1_grad/Reshape" + op: "Reshape" + input: "gradients/AddN_76" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/ffn_1/intermediate/dense/einsum/Reshape_1_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/ffn_1/intermediate/dense/einsum/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/ffn_1/intermediate/dense/einsum/Reshape_1_grad/Reshape" + input: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/ffn_1/intermediate/dense/einsum/MatMul_grad/MatMul_1" + op: "MatMul" + input: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/ffn_1/intermediate/dense/einsum/Reshape" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/ffn_1/intermediate/dense/einsum/Reshape_1_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/ffn_1/intermediate/dense/einsum/Reshape_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/ffn_1/intermediate/dense/einsum/Reshape_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/ffn_1/intermediate/dense/einsum/MatMul_grad/MatMul" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/ffn_1/intermediate/dense/einsum/Reshape_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/AddN_77" + op: "AddN" + input: "gradients/AddN_75" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/ffn_1/intermediate/dense/einsum/Reshape_grad/Reshape" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm_1/batchnorm/mul_1_grad/Mul" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm/batchnorm/mul_1_grad/Mul" + op: "Mul" + input: "gradients/AddN_77" + input: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm/batchnorm/mul_1_grad/Mul_1" + op: "Mul" + input: "gradients/AddN_77" + input: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm/batchnorm/sub_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm/batchnorm/sub_grad/Sum" + op: "Sum" + input: "gradients/AddN_77" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm/batchnorm/sub_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm/batchnorm/sub_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 768 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm/batchnorm/sub_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm/batchnorm/sub_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm/batchnorm/sub_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm/batchnorm/sub_grad/Neg" + op: "Neg" + input: "gradients/AddN_77" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm/batchnorm/mul_2_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm/batchnorm/sub_grad/Neg" + input: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm/batchnorm/mul_2_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 2 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm/batchnorm/mul_2_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm/batchnorm/mul_2_grad/Mul" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm/batchnorm/mul_2_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm/batchnorm/mul_2_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm/batchnorm/mul_2_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm/batchnorm/mul_2_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm/batchnorm/mul_2_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm/batchnorm/mul_2_grad/Mul_1" + op: "Mul" + input: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm/moments/mean" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm/batchnorm/sub_grad/Neg" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/AddN_78" + op: "AddN" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm/batchnorm/mul_1_grad/Mul_1" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm/batchnorm/mul_2_grad/Mul_1" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm/batchnorm/mul_1_grad/Mul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm/batchnorm/mul_grad/Mul" + op: "Mul" + input: "gradients/AddN_78" + input: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm/gamma/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm/batchnorm/mul_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 2 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm/batchnorm/mul_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm/batchnorm/mul_grad/Mul" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm/batchnorm/mul_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm/batchnorm/mul_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm/batchnorm/mul_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm/batchnorm/mul_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm/batchnorm/mul_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm/batchnorm/mul_grad/Mul_1" + op: "Mul" + input: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm/batchnorm/Rsqrt" + input: "gradients/AddN_78" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm/batchnorm/mul_grad/Sum_1/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm/batchnorm/mul_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm/batchnorm/mul_grad/Mul_1" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm/batchnorm/mul_grad/Sum_1/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm/batchnorm/mul_grad/Reshape_1/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 768 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm/batchnorm/mul_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm/batchnorm/mul_grad/Sum_1" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm/batchnorm/mul_grad/Reshape_1/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad" + op: "RsqrtGrad" + input: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm/batchnorm/Rsqrt" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm/batchnorm/mul_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm/batchnorm/add_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\000\000\000\000\001\000\000\000\002\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm/batchnorm/add_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm/batchnorm/add_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm/batchnorm/add_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm/batchnorm/add_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm/batchnorm/add_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm/batchnorm/add_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm/moments/variance_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm/moments/variance_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm/moments/variance_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm/moments/variance_grad/Tile/multiples" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\001\000\000\000\001\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm/moments/variance_grad/Tile" + op: "Tile" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm/moments/variance_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm/moments/variance_grad/Tile/multiples" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tmultiples" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm/moments/variance_grad/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 768.0 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm/moments/variance_grad/truediv" + op: "RealDiv" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm/moments/variance_grad/Tile" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm/moments/variance_grad/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm/moments/SquaredDifference_grad/scalar" + op: "Const" + input: "^gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm/moments/variance_grad/truediv" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 2.0 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm/moments/SquaredDifference_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm/moments/SquaredDifference_grad/scalar" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm/moments/variance_grad/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm/moments/SquaredDifference_grad/sub" + op: "Sub" + input: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/add" + input: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm/moments/StopGradient" + input: "^gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm/moments/variance_grad/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm/moments/SquaredDifference_grad/mul_1" + op: "Mul" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm/moments/SquaredDifference_grad/Mul" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm/moments/SquaredDifference_grad/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm/moments/SquaredDifference_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 2 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm/moments/SquaredDifference_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm/moments/SquaredDifference_grad/mul_1" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm/moments/SquaredDifference_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm/moments/SquaredDifference_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm/moments/SquaredDifference_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm/moments/SquaredDifference_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm/moments/SquaredDifference_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm/moments/SquaredDifference_grad/Neg" + op: "Neg" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm/moments/SquaredDifference_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm/moments/mean_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm/moments/mean_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm/batchnorm/mul_2_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm/moments/mean_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm/moments/mean_grad/Tile/multiples" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\001\000\000\000\001\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm/moments/mean_grad/Tile" + op: "Tile" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm/moments/mean_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm/moments/mean_grad/Tile/multiples" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tmultiples" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm/moments/mean_grad/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 768.0 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm/moments/mean_grad/truediv" + op: "RealDiv" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm/moments/mean_grad/Tile" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm/moments/mean_grad/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/AddN_79" + op: "AddN" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm/batchnorm/mul_1_grad/Mul" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm/moments/SquaredDifference_grad/mul_1" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm/moments/mean_grad/truediv" + attr { + key: "N" + value { + i: 3 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm/batchnorm/mul_1_grad/Mul" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/output/dense/add_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/output/dense/add_grad/Sum" + op: "Sum" + input: "gradients/AddN_79" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/output/dense/add_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/output/dense/add_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 768 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/output/dense/add_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/output/dense/add_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/output/dense/add_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/output/dense/einsum/Reshape_2_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\004\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/output/dense/einsum/Reshape_2_grad/Reshape" + op: "Reshape" + input: "gradients/AddN_79" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/output/dense/einsum/Reshape_2_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/output/dense/einsum/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/output/dense/einsum/Reshape_2_grad/Reshape" + input: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/output/dense/einsum/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/output/dense/einsum/MatMul_grad/MatMul_1" + op: "MatMul" + input: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/output/dense/einsum/Reshape" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/output/dense/einsum/Reshape_2_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/output/dense/einsum/Reshape_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\020\000\000\000@\000\000\000@\000\000\000\014\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/output/dense/einsum/Reshape_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/output/dense/einsum/MatMul_grad/MatMul" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/output/dense/einsum/Reshape_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 64 + } + dim { + size: 12 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/output/dense/einsum/Reshape_1_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "@\000\000\000\014\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/output/dense/einsum/Reshape_1_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/output/dense/einsum/MatMul_grad/MatMul_1" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/output/dense/einsum/Reshape_1_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 64 + } + dim { + size: 12 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/output/dense/einsum/transpose_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/output/dense/einsum/transpose/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/output/dense/einsum/transpose_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/output/dense/einsum/Reshape_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/output/dense/einsum/transpose_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/output/dense/einsum/transpose_1_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/output/dense/einsum/transpose_1/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/output/dense/einsum/transpose_1_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/output/dense/einsum/Reshape_1_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/output/dense/einsum/transpose_1_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/transpose_3_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/transpose_3/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/transpose_3_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/output/dense/einsum/transpose_grad/transpose" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/transpose_3_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/output/dense/Reshape_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/output/dense/Reshape_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/output/dense/einsum/transpose_1_grad/transpose" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/output/dense/Reshape_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/MatMul_2_grad/MatMul" + op: "BatchMatMulV2" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/transpose_3_grad/transpose" + input: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/transpose_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: false + } + } + attr { + key: "adj_y" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/MatMul_2_grad/MatMul_1" + op: "BatchMatMulV2" + input: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/attention_probs" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/transpose_3_grad/transpose" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: true + } + } + attr { + key: "adj_y" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/attention_probs_grad/mul" + op: "Mul" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/MatMul_2_grad/MatMul" + input: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/attention_probs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/attention_probs_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: -1 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/attention_probs_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/attention_probs_grad/mul" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/attention_probs_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/attention_probs_grad/sub" + op: "Sub" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/MatMul_2_grad/MatMul" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/attention_probs_grad/Sum" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/attention_probs_grad/mul_1" + op: "Mul" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/attention_probs_grad/sub" + input: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/attention_probs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/transpose_2_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/transpose_2/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/transpose_2_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/MatMul_2_grad/MatMul_1" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/transpose_2_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/add_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/add_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/attention_probs_grad/mul_1" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/add_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/add_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\020\000\000\000\001\000\000\000@\000\000\000@\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/add_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/add_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/add_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 1 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/value/add_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/value/add_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/transpose_2_grad/transpose" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/value/add_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/value/add_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\014\000\000\000@\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/value/add_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/value/add_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/value/add_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/Mul_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/attention_probs_grad/mul_1" + input: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/Mul/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/Mul_grad/Mul_1" + op: "Mul" + input: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/MatMul" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/attention_probs_grad/mul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/Mul_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\000\000\000\000\001\000\000\000\002\000\000\000\003\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/Mul_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/Mul_grad/Mul_1" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/Mul_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/Mul_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/Mul_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/Mul_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/Mul_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/value/einsum/transpose_1_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/value/einsum/transpose_1/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/value/einsum/transpose_1_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/transpose_2_grad/transpose" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/value/einsum/transpose_1_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 64 + } + dim { + size: 12 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/value/Reshape_1_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 768 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/value/Reshape_1_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/value/add_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/value/Reshape_1_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/MatMul_grad/MatMul" + op: "BatchMatMulV2" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/Mul_grad/Mul" + input: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/transpose_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: false + } + } + attr { + key: "adj_y" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/MatMul_grad/MatMul_1" + op: "BatchMatMulV2" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/Mul_grad/Mul" + input: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/transpose" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: true + } + } + attr { + key: "adj_y" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/value/einsum/Reshape_2_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\004\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/value/einsum/Reshape_2_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/value/einsum/transpose_1_grad/transpose" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/value/einsum/Reshape_2_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/transpose_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/transpose/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/transpose_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/MatMul_grad/MatMul" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/transpose_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/transpose_1_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/transpose_1/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/transpose_1_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/MatMul_grad/MatMul_1" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/transpose_1_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/value/einsum/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/value/einsum/Reshape_2_grad/Reshape" + input: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/value/einsum/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/value/einsum/MatMul_grad/MatMul_1" + op: "MatMul" + input: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/value/einsum/Reshape" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/value/einsum/Reshape_2_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/query/add_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/query/add_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/transpose_grad/transpose" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/query/add_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/query/add_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\014\000\000\000@\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/query/add_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/query/add_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/query/add_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/key/add_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/key/add_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/transpose_1_grad/transpose" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/key/add_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/key/add_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\014\000\000\000@\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/key/add_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/key/add_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/key/add_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/value/einsum/Reshape_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/value/einsum/Reshape_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/value/einsum/MatMul_grad/MatMul" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/value/einsum/Reshape_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/value/einsum/Reshape_1_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\000\003\000\000@\000\000\000\014\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/value/einsum/Reshape_1_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/value/einsum/MatMul_grad/MatMul_1" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/value/einsum/Reshape_1_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 64 + } + dim { + size: 12 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/query/einsum/transpose_1_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/query/einsum/transpose_1/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/query/einsum/transpose_1_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/transpose_grad/transpose" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/query/einsum/transpose_1_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 64 + } + dim { + size: 12 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/query/Reshape_1_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 768 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/query/Reshape_1_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/query/add_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/query/Reshape_1_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/key/einsum/transpose_1_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/key/einsum/transpose_1/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/key/einsum/transpose_1_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/transpose_1_grad/transpose" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/key/einsum/transpose_1_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 64 + } + dim { + size: 12 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/key/Reshape_1_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 768 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/key/Reshape_1_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/key/add_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/key/Reshape_1_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/value/einsum/transpose_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/value/einsum/transpose/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/value/einsum/transpose_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/value/einsum/Reshape_1_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/value/einsum/transpose_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/query/einsum/Reshape_2_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\004\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/query/einsum/Reshape_2_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/query/einsum/transpose_1_grad/transpose" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/query/einsum/Reshape_2_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/key/einsum/Reshape_2_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\004\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/key/einsum/Reshape_2_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/key/einsum/transpose_1_grad/transpose" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/key/einsum/Reshape_2_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/value/Reshape_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/value/Reshape_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/value/einsum/transpose_grad/transpose" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/value/Reshape_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/query/einsum/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/query/einsum/Reshape_2_grad/Reshape" + input: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/query/einsum/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/query/einsum/MatMul_grad/MatMul_1" + op: "MatMul" + input: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/query/einsum/Reshape" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/query/einsum/Reshape_2_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/key/einsum/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/key/einsum/Reshape_2_grad/Reshape" + input: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/key/einsum/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/key/einsum/MatMul_grad/MatMul_1" + op: "MatMul" + input: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/key/einsum/Reshape" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/key/einsum/Reshape_2_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/query/einsum/Reshape_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/query/einsum/Reshape_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/query/einsum/MatMul_grad/MatMul" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/query/einsum/Reshape_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/query/einsum/Reshape_1_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\000\003\000\000@\000\000\000\014\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/query/einsum/Reshape_1_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/query/einsum/MatMul_grad/MatMul_1" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/query/einsum/Reshape_1_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 64 + } + dim { + size: 12 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/key/einsum/Reshape_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/key/einsum/Reshape_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/key/einsum/MatMul_grad/MatMul" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/key/einsum/Reshape_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/key/einsum/Reshape_1_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\000\003\000\000@\000\000\000\014\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/key/einsum/Reshape_1_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/key/einsum/MatMul_grad/MatMul_1" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/key/einsum/Reshape_1_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 64 + } + dim { + size: 12 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/query/einsum/transpose_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/query/einsum/transpose/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/query/einsum/transpose_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/query/einsum/Reshape_1_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/query/einsum/transpose_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/AddN_80" + op: "AddN" + input: "gradients/AddN_79" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/value/einsum/Reshape_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/query/einsum/Reshape_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/key/einsum/Reshape_grad/Reshape" + attr { + key: "N" + value { + i: 4 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm/batchnorm/mul_1_grad/Mul" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/key/einsum/transpose_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/key/einsum/transpose/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/key/einsum/transpose_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/key/einsum/Reshape_1_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/key/einsum/transpose_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/query/Reshape_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/query/Reshape_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/query/einsum/transpose_grad/transpose" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/query/Reshape_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm_1/batchnorm/mul_1_grad/Mul" + op: "Mul" + input: "gradients/AddN_80" + input: "bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm_1/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm_1/batchnorm/mul_1_grad/Mul_1" + op: "Mul" + input: "gradients/AddN_80" + input: "bert/encoder/transformer/group_0/layer_0/inner_group_0/add_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm_1/batchnorm/sub_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm_1/batchnorm/sub_grad/Sum" + op: "Sum" + input: "gradients/AddN_80" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm_1/batchnorm/sub_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm_1/batchnorm/sub_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 768 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm_1/batchnorm/sub_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm_1/batchnorm/sub_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm_1/batchnorm/sub_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm_1/batchnorm/sub_grad/Neg" + op: "Neg" + input: "gradients/AddN_80" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/key/Reshape_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/key/Reshape_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/key/einsum/transpose_grad/transpose" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/key/Reshape_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/AddN_81" + op: "AddN" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm_1/batchnorm/sub_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm_1/batchnorm/sub_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm_1/batchnorm/sub_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm_1/batchnorm/sub_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm_1/batchnorm/sub_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm_1/batchnorm/sub_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm_1/batchnorm/sub_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm_1/batchnorm/sub_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm_1/batchnorm/sub_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm_1/batchnorm/sub_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm_1/batchnorm/sub_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm_1/batchnorm/sub_grad/Reshape" + attr { + key: "N" + value { + i: 12 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm_1/batchnorm/sub_grad/Reshape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm_1/batchnorm/mul_2_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm_1/batchnorm/sub_grad/Neg" + input: "bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm_1/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm_1/batchnorm/mul_2_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 2 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm_1/batchnorm/mul_2_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm_1/batchnorm/mul_2_grad/Mul" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm_1/batchnorm/mul_2_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm_1/batchnorm/mul_2_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm_1/batchnorm/mul_2_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm_1/batchnorm/mul_2_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm_1/batchnorm/mul_2_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm_1/batchnorm/mul_2_grad/Mul_1" + op: "Mul" + input: "bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm_1/moments/mean" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm_1/batchnorm/sub_grad/Neg" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/AddN_82" + op: "AddN" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm_1/batchnorm/mul_1_grad/Mul_1" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm_1/batchnorm/mul_2_grad/Mul_1" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm_1/batchnorm/mul_1_grad/Mul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Mul" + op: "Mul" + input: "gradients/AddN_82" + input: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm_1/gamma/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 2 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Mul" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Mul_1" + op: "Mul" + input: "bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm_1/batchnorm/Rsqrt" + input: "gradients/AddN_82" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Sum_1/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Mul_1" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Sum_1/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Reshape_1/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 768 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Sum_1" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Reshape_1/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm_1/batchnorm/Rsqrt_grad/RsqrtGrad" + op: "RsqrtGrad" + input: "bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm_1/batchnorm/Rsqrt" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/AddN_83" + op: "AddN" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Reshape_1" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Reshape_1" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Reshape_1" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Reshape_1" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Reshape_1" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Reshape_1" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Reshape_1" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Reshape_1" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Reshape_1" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Reshape_1" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Reshape_1" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Reshape_1" + attr { + key: "N" + value { + i: 12 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Reshape_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm_1/batchnorm/add_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\000\000\000\000\001\000\000\000\002\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm_1/batchnorm/add_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm_1/batchnorm/Rsqrt_grad/RsqrtGrad" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm_1/batchnorm/add_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm_1/batchnorm/add_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm_1/batchnorm/add_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm_1/batchnorm/add_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm_1/batchnorm/add_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm_1/moments/variance_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm_1/moments/variance_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm_1/batchnorm/Rsqrt_grad/RsqrtGrad" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm_1/moments/variance_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm_1/moments/variance_grad/Tile/multiples" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\001\000\000\000\001\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm_1/moments/variance_grad/Tile" + op: "Tile" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm_1/moments/variance_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm_1/moments/variance_grad/Tile/multiples" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tmultiples" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm_1/moments/variance_grad/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 768.0 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm_1/moments/variance_grad/truediv" + op: "RealDiv" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm_1/moments/variance_grad/Tile" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm_1/moments/variance_grad/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm_1/moments/SquaredDifference_grad/scalar" + op: "Const" + input: "^gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm_1/moments/variance_grad/truediv" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 2.0 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm_1/moments/SquaredDifference_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm_1/moments/SquaredDifference_grad/scalar" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm_1/moments/variance_grad/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm_1/moments/SquaredDifference_grad/sub" + op: "Sub" + input: "bert/encoder/transformer/group_0/layer_0/inner_group_0/add_1" + input: "bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm_1/moments/StopGradient" + input: "^gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm_1/moments/variance_grad/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm_1/moments/SquaredDifference_grad/mul_1" + op: "Mul" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm_1/moments/SquaredDifference_grad/Mul" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm_1/moments/SquaredDifference_grad/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm_1/moments/SquaredDifference_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 2 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm_1/moments/SquaredDifference_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm_1/moments/SquaredDifference_grad/mul_1" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm_1/moments/SquaredDifference_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm_1/moments/SquaredDifference_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm_1/moments/SquaredDifference_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm_1/moments/SquaredDifference_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm_1/moments/SquaredDifference_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm_1/moments/SquaredDifference_grad/Neg" + op: "Neg" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm_1/moments/SquaredDifference_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm_1/moments/mean_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm_1/moments/mean_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm_1/batchnorm/mul_2_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm_1/moments/mean_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm_1/moments/mean_grad/Tile/multiples" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\001\000\000\000\001\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm_1/moments/mean_grad/Tile" + op: "Tile" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm_1/moments/mean_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm_1/moments/mean_grad/Tile/multiples" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tmultiples" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm_1/moments/mean_grad/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 768.0 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm_1/moments/mean_grad/truediv" + op: "RealDiv" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm_1/moments/mean_grad/Tile" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm_1/moments/mean_grad/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/AddN_84" + op: "AddN" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm_1/batchnorm/mul_1_grad/Mul" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm_1/moments/SquaredDifference_grad/mul_1" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm_1/moments/mean_grad/truediv" + attr { + key: "N" + value { + i: 3 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm_1/batchnorm/mul_1_grad/Mul" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/ffn_1/intermediate/output/dense/add_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/ffn_1/intermediate/output/dense/add_grad/Sum" + op: "Sum" + input: "gradients/AddN_84" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/ffn_1/intermediate/output/dense/add_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/ffn_1/intermediate/output/dense/add_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 768 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/ffn_1/intermediate/output/dense/add_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/ffn_1/intermediate/output/dense/add_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/ffn_1/intermediate/output/dense/add_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape_1_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\004\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape_1_grad/Reshape" + op: "Reshape" + input: "gradients/AddN_84" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape_1_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/AddN_85" + op: "AddN" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/output/dense/add_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/ffn_1/intermediate/output/dense/add_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/ffn_1/intermediate/output/dense/add_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/ffn_1/intermediate/output/dense/add_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/ffn_1/intermediate/output/dense/add_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/ffn_1/intermediate/output/dense/add_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/ffn_1/intermediate/output/dense/add_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/ffn_1/intermediate/output/dense/add_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/ffn_1/intermediate/output/dense/add_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/ffn_1/intermediate/output/dense/add_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/ffn_1/intermediate/output/dense/add_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/ffn_1/intermediate/output/dense/add_grad/Reshape" + attr { + key: "N" + value { + i: 12 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/output/dense/add_grad/Reshape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/ffn_1/intermediate/output/dense/einsum/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape_1_grad/Reshape" + input: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/ffn_1/intermediate/output/dense/einsum/MatMul_grad/MatMul_1" + op: "MatMul" + input: "bert/encoder/transformer/group_0/layer_0/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape_1_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\000\014\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/ffn_1/intermediate/output/dense/einsum/MatMul_grad/MatMul" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/AddN_86" + op: "AddN" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/output/dense/einsum/MatMul_grad/MatMul_1" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/ffn_1/intermediate/output/dense/einsum/MatMul_grad/MatMul_1" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/ffn_1/intermediate/output/dense/einsum/MatMul_grad/MatMul_1" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/ffn_1/intermediate/output/dense/einsum/MatMul_grad/MatMul_1" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/ffn_1/intermediate/output/dense/einsum/MatMul_grad/MatMul_1" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/ffn_1/intermediate/output/dense/einsum/MatMul_grad/MatMul_1" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/ffn_1/intermediate/output/dense/einsum/MatMul_grad/MatMul_1" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/ffn_1/intermediate/output/dense/einsum/MatMul_grad/MatMul_1" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/ffn_1/intermediate/output/dense/einsum/MatMul_grad/MatMul_1" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/ffn_1/intermediate/output/dense/einsum/MatMul_grad/MatMul_1" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/ffn_1/intermediate/output/dense/einsum/MatMul_grad/MatMul_1" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/ffn_1/intermediate/output/dense/einsum/MatMul_grad/MatMul_1" + attr { + key: "N" + value { + i: 12 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/output/dense/einsum/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/ffn_1/intermediate/mul_3_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape_grad/Reshape" + input: "bert/encoder/transformer/group_0/layer_0/inner_group_0/ffn_1/intermediate/mul_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/ffn_1/intermediate/mul_3_grad/Mul_1" + op: "Mul" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/ffn_1/intermediate/output/dense/einsum/Reshape_grad/Reshape" + input: "bert/encoder/transformer/group_0/layer_0/inner_group_0/ffn_1/intermediate/dense/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/ffn_1/intermediate/mul_2_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/ffn_1/intermediate/mul_3_grad/Mul_1" + input: "bert/encoder/transformer/group_0/layer_0/inner_group_0/ffn_1/intermediate/add_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/ffn_1/intermediate/mul_2_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\000\000\000\000\001\000\000\000\002\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/ffn_1/intermediate/mul_2_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/ffn_1/intermediate/mul_2_grad/Mul" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/ffn_1/intermediate/mul_2_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/ffn_1/intermediate/mul_2_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/ffn_1/intermediate/mul_2_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/ffn_1/intermediate/mul_2_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/ffn_1/intermediate/mul_2_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/ffn_1/intermediate/mul_2_grad/Mul_1" + op: "Mul" + input: "bert/encoder/transformer/group_0/layer_0/inner_group_0/ffn_1/intermediate/mul_2/x" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/ffn_1/intermediate/mul_3_grad/Mul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/ffn_1/intermediate/add_1_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\000\000\000\000\001\000\000\000\002\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/ffn_1/intermediate/add_1_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/ffn_1/intermediate/mul_2_grad/Mul_1" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/ffn_1/intermediate/add_1_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/ffn_1/intermediate/add_1_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/ffn_1/intermediate/add_1_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/ffn_1/intermediate/add_1_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/ffn_1/intermediate/add_1_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/ffn_1/intermediate/Tanh_grad/TanhGrad" + op: "TanhGrad" + input: "bert/encoder/transformer/group_0/layer_0/inner_group_0/ffn_1/intermediate/Tanh" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/ffn_1/intermediate/mul_2_grad/Mul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/ffn_1/intermediate/mul_1_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/ffn_1/intermediate/Tanh_grad/TanhGrad" + input: "bert/encoder/transformer/group_0/layer_0/inner_group_0/ffn_1/intermediate/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/ffn_1/intermediate/mul_1_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\000\000\000\000\001\000\000\000\002\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/ffn_1/intermediate/mul_1_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/ffn_1/intermediate/mul_1_grad/Mul" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/ffn_1/intermediate/mul_1_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/ffn_1/intermediate/mul_1_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/ffn_1/intermediate/mul_1_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/ffn_1/intermediate/mul_1_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/ffn_1/intermediate/mul_1_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/ffn_1/intermediate/mul_1_grad/Mul_1" + op: "Mul" + input: "bert/encoder/transformer/group_0/layer_0/inner_group_0/ffn_1/intermediate/mul_1/x" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/ffn_1/intermediate/Tanh_grad/TanhGrad" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/ffn_1/intermediate/mul_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/ffn_1/intermediate/mul_1_grad/Mul_1" + input: "bert/encoder/transformer/group_0/layer_0/inner_group_0/ffn_1/intermediate/Pow" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/ffn_1/intermediate/mul_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\000\000\000\000\001\000\000\000\002\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/ffn_1/intermediate/mul_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/ffn_1/intermediate/mul_grad/Mul" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/ffn_1/intermediate/mul_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/ffn_1/intermediate/mul_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/ffn_1/intermediate/mul_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/ffn_1/intermediate/mul_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/ffn_1/intermediate/mul_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/ffn_1/intermediate/mul_grad/Mul_1" + op: "Mul" + input: "bert/encoder/transformer/group_0/layer_0/inner_group_0/ffn_1/intermediate/mul/x" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/ffn_1/intermediate/mul_1_grad/Mul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/ffn_1/intermediate/Pow_grad/mul" + op: "Mul" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/ffn_1/intermediate/mul_grad/Mul_1" + input: "bert/encoder/transformer/group_0/layer_0/inner_group_0/ffn_1/intermediate/Pow/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/ffn_1/intermediate/Pow_grad/sub/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/ffn_1/intermediate/Pow_grad/sub" + op: "Sub" + input: "bert/encoder/transformer/group_0/layer_0/inner_group_0/ffn_1/intermediate/Pow/y" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/ffn_1/intermediate/Pow_grad/sub/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/ffn_1/intermediate/Pow_grad/Pow" + op: "Pow" + input: "bert/encoder/transformer/group_0/layer_0/inner_group_0/ffn_1/intermediate/dense/add" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/ffn_1/intermediate/Pow_grad/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/ffn_1/intermediate/Pow_grad/mul_1" + op: "Mul" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/ffn_1/intermediate/Pow_grad/mul" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/ffn_1/intermediate/Pow_grad/Pow" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/ffn_1/intermediate/Pow_grad/Greater/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/ffn_1/intermediate/Pow_grad/Greater" + op: "Greater" + input: "bert/encoder/transformer/group_0/layer_0/inner_group_0/ffn_1/intermediate/dense/add" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/ffn_1/intermediate/Pow_grad/Greater/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/ffn_1/intermediate/Pow_grad/ones_like/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\000\014\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/ffn_1/intermediate/Pow_grad/ones_like/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/ffn_1/intermediate/Pow_grad/ones_like" + op: "Fill" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/ffn_1/intermediate/Pow_grad/ones_like/Shape" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/ffn_1/intermediate/Pow_grad/ones_like/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/ffn_1/intermediate/Pow_grad/Select" + op: "Select" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/ffn_1/intermediate/Pow_grad/Greater" + input: "bert/encoder/transformer/group_0/layer_0/inner_group_0/ffn_1/intermediate/dense/add" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/ffn_1/intermediate/Pow_grad/ones_like" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/ffn_1/intermediate/Pow_grad/Log" + op: "Log" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/ffn_1/intermediate/Pow_grad/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/ffn_1/intermediate/Pow_grad/zeros_like/shape_as_tensor" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\000\014\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/ffn_1/intermediate/Pow_grad/zeros_like/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/ffn_1/intermediate/Pow_grad/zeros_like" + op: "Fill" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/ffn_1/intermediate/Pow_grad/zeros_like/shape_as_tensor" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/ffn_1/intermediate/Pow_grad/zeros_like/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/ffn_1/intermediate/Pow_grad/Select_1" + op: "Select" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/ffn_1/intermediate/Pow_grad/Greater" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/ffn_1/intermediate/Pow_grad/Log" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/ffn_1/intermediate/Pow_grad/zeros_like" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/ffn_1/intermediate/Pow_grad/mul_2" + op: "Mul" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/ffn_1/intermediate/mul_grad/Mul_1" + input: "bert/encoder/transformer/group_0/layer_0/inner_group_0/ffn_1/intermediate/Pow" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/ffn_1/intermediate/Pow_grad/mul_3" + op: "Mul" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/ffn_1/intermediate/Pow_grad/mul_2" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/ffn_1/intermediate/Pow_grad/Select_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/ffn_1/intermediate/Pow_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\000\000\000\000\001\000\000\000\002\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/ffn_1/intermediate/Pow_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/ffn_1/intermediate/Pow_grad/mul_3" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/ffn_1/intermediate/Pow_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/ffn_1/intermediate/Pow_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/ffn_1/intermediate/Pow_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/ffn_1/intermediate/Pow_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/ffn_1/intermediate/Pow_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/AddN_87" + op: "AddN" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/ffn_1/intermediate/mul_3_grad/Mul" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/ffn_1/intermediate/mul_1_grad/Mul_1" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/ffn_1/intermediate/Pow_grad/mul_1" + attr { + key: "N" + value { + i: 3 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/ffn_1/intermediate/mul_3_grad/Mul" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/ffn_1/intermediate/dense/add_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/ffn_1/intermediate/dense/add_grad/Sum" + op: "Sum" + input: "gradients/AddN_87" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/ffn_1/intermediate/dense/add_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/ffn_1/intermediate/dense/add_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 3072 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/ffn_1/intermediate/dense/add_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/ffn_1/intermediate/dense/add_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/ffn_1/intermediate/dense/add_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/ffn_1/intermediate/dense/einsum/Reshape_1_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\004\000\000\000\014\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/ffn_1/intermediate/dense/einsum/Reshape_1_grad/Reshape" + op: "Reshape" + input: "gradients/AddN_87" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/ffn_1/intermediate/dense/einsum/Reshape_1_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/AddN_88" + op: "AddN" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/dense/add_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/ffn_1/intermediate/dense/add_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/ffn_1/intermediate/dense/add_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/ffn_1/intermediate/dense/add_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/ffn_1/intermediate/dense/add_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/ffn_1/intermediate/dense/add_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/ffn_1/intermediate/dense/add_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/ffn_1/intermediate/dense/add_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/ffn_1/intermediate/dense/add_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/ffn_1/intermediate/dense/add_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/ffn_1/intermediate/dense/add_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/ffn_1/intermediate/dense/add_grad/Reshape" + attr { + key: "N" + value { + i: 12 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/dense/add_grad/Reshape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/ffn_1/intermediate/dense/einsum/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/ffn_1/intermediate/dense/einsum/Reshape_1_grad/Reshape" + input: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/ffn_1/intermediate/dense/einsum/MatMul_grad/MatMul_1" + op: "MatMul" + input: "bert/encoder/transformer/group_0/layer_0/inner_group_0/ffn_1/intermediate/dense/einsum/Reshape" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/ffn_1/intermediate/dense/einsum/Reshape_1_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/ffn_1/intermediate/dense/einsum/Reshape_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/ffn_1/intermediate/dense/einsum/Reshape_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/ffn_1/intermediate/dense/einsum/MatMul_grad/MatMul" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/ffn_1/intermediate/dense/einsum/Reshape_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/AddN_89" + op: "AddN" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/dense/einsum/MatMul_grad/MatMul_1" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/ffn_1/intermediate/dense/einsum/MatMul_grad/MatMul_1" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/ffn_1/intermediate/dense/einsum/MatMul_grad/MatMul_1" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/ffn_1/intermediate/dense/einsum/MatMul_grad/MatMul_1" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/ffn_1/intermediate/dense/einsum/MatMul_grad/MatMul_1" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/ffn_1/intermediate/dense/einsum/MatMul_grad/MatMul_1" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/ffn_1/intermediate/dense/einsum/MatMul_grad/MatMul_1" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/ffn_1/intermediate/dense/einsum/MatMul_grad/MatMul_1" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/ffn_1/intermediate/dense/einsum/MatMul_grad/MatMul_1" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/ffn_1/intermediate/dense/einsum/MatMul_grad/MatMul_1" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/ffn_1/intermediate/dense/einsum/MatMul_grad/MatMul_1" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/ffn_1/intermediate/dense/einsum/MatMul_grad/MatMul_1" + attr { + key: "N" + value { + i: 12 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/dense/einsum/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "gradients/AddN_90" + op: "AddN" + input: "gradients/AddN_84" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/ffn_1/intermediate/dense/einsum/Reshape_grad/Reshape" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm_1/batchnorm/mul_1_grad/Mul" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm/batchnorm/mul_1_grad/Mul" + op: "Mul" + input: "gradients/AddN_90" + input: "bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm/batchnorm/mul_1_grad/Mul_1" + op: "Mul" + input: "gradients/AddN_90" + input: "bert/encoder/transformer/group_0/layer_0/inner_group_0/add" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm/batchnorm/sub_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm/batchnorm/sub_grad/Sum" + op: "Sum" + input: "gradients/AddN_90" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm/batchnorm/sub_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm/batchnorm/sub_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 768 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm/batchnorm/sub_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm/batchnorm/sub_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm/batchnorm/sub_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm/batchnorm/sub_grad/Neg" + op: "Neg" + input: "gradients/AddN_90" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/AddN_91" + op: "AddN" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm/batchnorm/sub_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm/batchnorm/sub_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm/batchnorm/sub_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm/batchnorm/sub_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm/batchnorm/sub_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm/batchnorm/sub_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm/batchnorm/sub_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm/batchnorm/sub_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm/batchnorm/sub_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm/batchnorm/sub_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm/batchnorm/sub_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm/batchnorm/sub_grad/Reshape" + attr { + key: "N" + value { + i: 12 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm/batchnorm/sub_grad/Reshape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm/batchnorm/mul_2_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm/batchnorm/sub_grad/Neg" + input: "bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm/batchnorm/mul_2_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 2 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm/batchnorm/mul_2_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm/batchnorm/mul_2_grad/Mul" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm/batchnorm/mul_2_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm/batchnorm/mul_2_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm/batchnorm/mul_2_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm/batchnorm/mul_2_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm/batchnorm/mul_2_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm/batchnorm/mul_2_grad/Mul_1" + op: "Mul" + input: "bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm/moments/mean" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm/batchnorm/sub_grad/Neg" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/AddN_92" + op: "AddN" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm/batchnorm/mul_1_grad/Mul_1" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm/batchnorm/mul_2_grad/Mul_1" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm/batchnorm/mul_1_grad/Mul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm/batchnorm/mul_grad/Mul" + op: "Mul" + input: "gradients/AddN_92" + input: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm/gamma/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm/batchnorm/mul_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 2 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm/batchnorm/mul_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm/batchnorm/mul_grad/Mul" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm/batchnorm/mul_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm/batchnorm/mul_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm/batchnorm/mul_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm/batchnorm/mul_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm/batchnorm/mul_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm/batchnorm/mul_grad/Mul_1" + op: "Mul" + input: "bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm/batchnorm/Rsqrt" + input: "gradients/AddN_92" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm/batchnorm/mul_grad/Sum_1/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm/batchnorm/mul_grad/Sum_1" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm/batchnorm/mul_grad/Mul_1" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm/batchnorm/mul_grad/Sum_1/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm/batchnorm/mul_grad/Reshape_1/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 768 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm/batchnorm/mul_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm/batchnorm/mul_grad/Sum_1" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm/batchnorm/mul_grad/Reshape_1/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad" + op: "RsqrtGrad" + input: "bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm/batchnorm/Rsqrt" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm/batchnorm/mul_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/AddN_93" + op: "AddN" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm/batchnorm/mul_grad/Reshape_1" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/LayerNorm/batchnorm/mul_grad/Reshape_1" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/LayerNorm/batchnorm/mul_grad/Reshape_1" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/LayerNorm/batchnorm/mul_grad/Reshape_1" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/LayerNorm/batchnorm/mul_grad/Reshape_1" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/LayerNorm/batchnorm/mul_grad/Reshape_1" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/LayerNorm/batchnorm/mul_grad/Reshape_1" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/LayerNorm/batchnorm/mul_grad/Reshape_1" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/LayerNorm/batchnorm/mul_grad/Reshape_1" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/LayerNorm/batchnorm/mul_grad/Reshape_1" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/LayerNorm/batchnorm/mul_grad/Reshape_1" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm/batchnorm/mul_grad/Reshape_1" + attr { + key: "N" + value { + i: 12 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm/batchnorm/mul_grad/Reshape_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm/batchnorm/add_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\000\000\000\000\001\000\000\000\002\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm/batchnorm/add_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm/batchnorm/add_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm/batchnorm/add_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm/batchnorm/add_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm/batchnorm/add_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm/batchnorm/add_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm/moments/variance_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm/moments/variance_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm/moments/variance_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm/moments/variance_grad/Tile/multiples" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\001\000\000\000\001\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm/moments/variance_grad/Tile" + op: "Tile" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm/moments/variance_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm/moments/variance_grad/Tile/multiples" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tmultiples" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm/moments/variance_grad/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 768.0 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm/moments/variance_grad/truediv" + op: "RealDiv" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm/moments/variance_grad/Tile" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm/moments/variance_grad/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm/moments/SquaredDifference_grad/scalar" + op: "Const" + input: "^gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm/moments/variance_grad/truediv" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 2.0 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm/moments/SquaredDifference_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm/moments/SquaredDifference_grad/scalar" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm/moments/variance_grad/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm/moments/SquaredDifference_grad/sub" + op: "Sub" + input: "bert/encoder/transformer/group_0/layer_0/inner_group_0/add" + input: "bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm/moments/StopGradient" + input: "^gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm/moments/variance_grad/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm/moments/SquaredDifference_grad/mul_1" + op: "Mul" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm/moments/SquaredDifference_grad/Mul" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm/moments/SquaredDifference_grad/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm/moments/SquaredDifference_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 2 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm/moments/SquaredDifference_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm/moments/SquaredDifference_grad/mul_1" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm/moments/SquaredDifference_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm/moments/SquaredDifference_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm/moments/SquaredDifference_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm/moments/SquaredDifference_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm/moments/SquaredDifference_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm/moments/SquaredDifference_grad/Neg" + op: "Neg" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm/moments/SquaredDifference_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm/moments/mean_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm/moments/mean_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm/batchnorm/mul_2_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm/moments/mean_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm/moments/mean_grad/Tile/multiples" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\001\000\000\000\001\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm/moments/mean_grad/Tile" + op: "Tile" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm/moments/mean_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm/moments/mean_grad/Tile/multiples" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tmultiples" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm/moments/mean_grad/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 768.0 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm/moments/mean_grad/truediv" + op: "RealDiv" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm/moments/mean_grad/Tile" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm/moments/mean_grad/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/AddN_94" + op: "AddN" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm/batchnorm/mul_1_grad/Mul" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm/moments/SquaredDifference_grad/mul_1" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm/moments/mean_grad/truediv" + attr { + key: "N" + value { + i: 3 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm/batchnorm/mul_1_grad/Mul" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/output/dense/add_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/output/dense/add_grad/Sum" + op: "Sum" + input: "gradients/AddN_94" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/output/dense/add_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/output/dense/add_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 768 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/output/dense/add_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/output/dense/add_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/output/dense/add_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/output/dense/einsum/Reshape_2_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\004\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/output/dense/einsum/Reshape_2_grad/Reshape" + op: "Reshape" + input: "gradients/AddN_94" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/output/dense/einsum/Reshape_2_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/AddN_95" + op: "AddN" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/output/dense/add_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/output/dense/add_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/output/dense/add_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/output/dense/add_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/output/dense/add_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/output/dense/add_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/output/dense/add_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/output/dense/add_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/output/dense/add_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/output/dense/add_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/output/dense/add_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/output/dense/add_grad/Reshape" + attr { + key: "N" + value { + i: 12 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/output/dense/add_grad/Reshape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/output/dense/einsum/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/output/dense/einsum/Reshape_2_grad/Reshape" + input: "bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/output/dense/einsum/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/output/dense/einsum/MatMul_grad/MatMul_1" + op: "MatMul" + input: "bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/output/dense/einsum/Reshape" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/output/dense/einsum/Reshape_2_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/output/dense/einsum/Reshape_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\020\000\000\000@\000\000\000@\000\000\000\014\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/output/dense/einsum/Reshape_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/output/dense/einsum/MatMul_grad/MatMul" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/output/dense/einsum/Reshape_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 64 + } + dim { + size: 12 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/output/dense/einsum/Reshape_1_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "@\000\000\000\014\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/output/dense/einsum/Reshape_1_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/output/dense/einsum/MatMul_grad/MatMul_1" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/output/dense/einsum/Reshape_1_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 64 + } + dim { + size: 12 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/output/dense/einsum/transpose_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/output/dense/einsum/transpose/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/output/dense/einsum/transpose_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/output/dense/einsum/Reshape_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/output/dense/einsum/transpose_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/output/dense/einsum/transpose_1_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/output/dense/einsum/transpose_1/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/output/dense/einsum/transpose_1_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/output/dense/einsum/Reshape_1_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/output/dense/einsum/transpose_1_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/transpose_3_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/transpose_3/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/transpose_3_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/output/dense/einsum/transpose_grad/transpose" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/transpose_3_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/output/dense/Reshape_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/output/dense/Reshape_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/output/dense/einsum/transpose_1_grad/transpose" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/output/dense/Reshape_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/MatMul_2_grad/MatMul" + op: "BatchMatMulV2" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/transpose_3_grad/transpose" + input: "bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/transpose_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: false + } + } + attr { + key: "adj_y" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/MatMul_2_grad/MatMul_1" + op: "BatchMatMulV2" + input: "bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/attention_probs" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/transpose_3_grad/transpose" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: true + } + } + attr { + key: "adj_y" + value { + b: false + } + } +} +node { + name: "gradients/AddN_96" + op: "AddN" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/output/dense/Reshape_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/output/dense/Reshape_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/output/dense/Reshape_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/output/dense/Reshape_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/output/dense/Reshape_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/output/dense/Reshape_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/output/dense/Reshape_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/output/dense/Reshape_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/output/dense/Reshape_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/output/dense/Reshape_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/output/dense/Reshape_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/output/dense/Reshape_grad/Reshape" + attr { + key: "N" + value { + i: 12 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/output/dense/Reshape_grad/Reshape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/attention_probs_grad/mul" + op: "Mul" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/MatMul_2_grad/MatMul" + input: "bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/attention_probs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/attention_probs_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: -1 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/attention_probs_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/attention_probs_grad/mul" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/attention_probs_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/attention_probs_grad/sub" + op: "Sub" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/MatMul_2_grad/MatMul" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/attention_probs_grad/Sum" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/attention_probs_grad/mul_1" + op: "Mul" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/attention_probs_grad/sub" + input: "bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/attention_probs" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/transpose_2_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/transpose_2/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/transpose_2_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/MatMul_2_grad/MatMul_1" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/transpose_2_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/add_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/add_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/attention_probs_grad/mul_1" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/add_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/add_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\020\000\000\000\001\000\000\000@\000\000\000@\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/add_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/add_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/add_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 1 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/value/add_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/value/add_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/transpose_2_grad/transpose" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/value/add_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/value/add_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\014\000\000\000@\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/value/add_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/value/add_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/value/add_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/Mul_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/attention_probs_grad/mul_1" + input: "bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/Mul/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/Mul_grad/Mul_1" + op: "Mul" + input: "bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/MatMul" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/attention_probs_grad/mul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/Mul_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\000\000\000\000\001\000\000\000\002\000\000\000\003\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/Mul_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/Mul_grad/Mul_1" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/Mul_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/Mul_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/Mul_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/Mul_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/Mul_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/value/einsum/transpose_1_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/value/einsum/transpose_1/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/value/einsum/transpose_1_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/transpose_2_grad/transpose" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/value/einsum/transpose_1_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 64 + } + dim { + size: 12 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/value/Reshape_1_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 768 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/value/Reshape_1_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/value/add_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/value/Reshape_1_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/MatMul_grad/MatMul" + op: "BatchMatMulV2" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/Mul_grad/Mul" + input: "bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/transpose_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: false + } + } + attr { + key: "adj_y" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/MatMul_grad/MatMul_1" + op: "BatchMatMulV2" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/Mul_grad/Mul" + input: "bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/transpose" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 12 + } + dim { + size: 64 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "adj_x" + value { + b: true + } + } + attr { + key: "adj_y" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/value/einsum/Reshape_2_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\004\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/value/einsum/Reshape_2_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/value/einsum/transpose_1_grad/transpose" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/value/einsum/Reshape_2_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/AddN_97" + op: "AddN" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/value/Reshape_1_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/value/Reshape_1_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/value/Reshape_1_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/value/Reshape_1_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/value/Reshape_1_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/value/Reshape_1_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/value/Reshape_1_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/value/Reshape_1_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/value/Reshape_1_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/value/Reshape_1_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/value/Reshape_1_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/value/Reshape_1_grad/Reshape" + attr { + key: "N" + value { + i: 12 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/value/Reshape_1_grad/Reshape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/transpose_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/transpose/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/transpose_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/MatMul_grad/MatMul" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/transpose_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/transpose_1_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/transpose_1/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/transpose_1_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/MatMul_grad/MatMul_1" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/transpose_1_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/value/einsum/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/value/einsum/Reshape_2_grad/Reshape" + input: "bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/value/einsum/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/value/einsum/MatMul_grad/MatMul_1" + op: "MatMul" + input: "bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/value/einsum/Reshape" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/value/einsum/Reshape_2_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/query/add_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/query/add_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/transpose_grad/transpose" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/query/add_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/query/add_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\014\000\000\000@\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/query/add_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/query/add_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/query/add_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/key/add_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/key/add_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/transpose_1_grad/transpose" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/key/add_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/key/add_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\014\000\000\000@\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/key/add_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/key/add_grad/Sum" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/key/add_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/value/einsum/Reshape_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/value/einsum/Reshape_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/value/einsum/MatMul_grad/MatMul" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/value/einsum/Reshape_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/value/einsum/Reshape_1_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\000\003\000\000@\000\000\000\014\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/value/einsum/Reshape_1_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/value/einsum/MatMul_grad/MatMul_1" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/value/einsum/Reshape_1_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 64 + } + dim { + size: 12 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/query/einsum/transpose_1_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/query/einsum/transpose_1/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/query/einsum/transpose_1_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/transpose_grad/transpose" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/query/einsum/transpose_1_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 64 + } + dim { + size: 12 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/query/Reshape_1_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 768 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/query/Reshape_1_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/query/add_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/query/Reshape_1_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/key/einsum/transpose_1_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/key/einsum/transpose_1/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/key/einsum/transpose_1_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/transpose_1_grad/transpose" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/key/einsum/transpose_1_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 64 + } + dim { + size: 12 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/key/Reshape_1_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 768 + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/key/Reshape_1_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/key/add_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/key/Reshape_1_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/value/einsum/transpose_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/value/einsum/transpose/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/value/einsum/transpose_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/value/einsum/Reshape_1_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/value/einsum/transpose_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/query/einsum/Reshape_2_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\004\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/query/einsum/Reshape_2_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/query/einsum/transpose_1_grad/transpose" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/query/einsum/Reshape_2_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/AddN_98" + op: "AddN" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/query/Reshape_1_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/query/Reshape_1_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/query/Reshape_1_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/query/Reshape_1_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/query/Reshape_1_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/query/Reshape_1_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/query/Reshape_1_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/query/Reshape_1_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/query/Reshape_1_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/query/Reshape_1_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/query/Reshape_1_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/query/Reshape_1_grad/Reshape" + attr { + key: "N" + value { + i: 12 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/query/Reshape_1_grad/Reshape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/key/einsum/Reshape_2_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\004\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/key/einsum/Reshape_2_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/key/einsum/transpose_1_grad/transpose" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/key/einsum/Reshape_2_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/AddN_99" + op: "AddN" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/key/Reshape_1_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/key/Reshape_1_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/key/Reshape_1_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/key/Reshape_1_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/key/Reshape_1_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/key/Reshape_1_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/key/Reshape_1_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/key/Reshape_1_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/key/Reshape_1_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/key/Reshape_1_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/key/Reshape_1_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/key/Reshape_1_grad/Reshape" + attr { + key: "N" + value { + i: 12 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/key/Reshape_1_grad/Reshape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/value/Reshape_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/value/Reshape_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/value/einsum/transpose_grad/transpose" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/value/Reshape_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/query/einsum/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/query/einsum/Reshape_2_grad/Reshape" + input: "bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/query/einsum/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/query/einsum/MatMul_grad/MatMul_1" + op: "MatMul" + input: "bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/query/einsum/Reshape" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/query/einsum/Reshape_2_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/key/einsum/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/key/einsum/Reshape_2_grad/Reshape" + input: "bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/key/einsum/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/key/einsum/MatMul_grad/MatMul_1" + op: "MatMul" + input: "bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/key/einsum/Reshape" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/key/einsum/Reshape_2_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/AddN_100" + op: "AddN" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/value/Reshape_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/value/Reshape_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/value/Reshape_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/value/Reshape_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/value/Reshape_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/value/Reshape_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/value/Reshape_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/value/Reshape_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/value/Reshape_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/value/Reshape_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/value/Reshape_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/value/Reshape_grad/Reshape" + attr { + key: "N" + value { + i: 12 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/value/Reshape_grad/Reshape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/query/einsum/Reshape_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/query/einsum/Reshape_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/query/einsum/MatMul_grad/MatMul" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/query/einsum/Reshape_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/query/einsum/Reshape_1_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\000\003\000\000@\000\000\000\014\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/query/einsum/Reshape_1_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/query/einsum/MatMul_grad/MatMul_1" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/query/einsum/Reshape_1_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 64 + } + dim { + size: 12 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/key/einsum/Reshape_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/key/einsum/Reshape_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/key/einsum/MatMul_grad/MatMul" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/key/einsum/Reshape_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/key/einsum/Reshape_1_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\000\003\000\000@\000\000\000\014\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/key/einsum/Reshape_1_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/key/einsum/MatMul_grad/MatMul_1" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/key/einsum/Reshape_1_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 64 + } + dim { + size: 12 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/query/einsum/transpose_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/query/einsum/transpose/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/query/einsum/transpose_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/query/einsum/Reshape_1_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/query/einsum/transpose_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/AddN_101" + op: "AddN" + input: "gradients/AddN_94" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/value/einsum/Reshape_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/query/einsum/Reshape_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/key/einsum/Reshape_grad/Reshape" + attr { + key: "N" + value { + i: 4 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/LayerNorm/batchnorm/mul_1_grad/Mul" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/embedding_hidden_mapping_in/add_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/embedding_hidden_mapping_in/add_grad/Sum" + op: "Sum" + input: "gradients/AddN_101" + input: "gradients/bert/encoder/embedding_hidden_mapping_in/add_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/encoder/embedding_hidden_mapping_in/add_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 768 + } + } + } +} +node { + name: "gradients/bert/encoder/embedding_hidden_mapping_in/add_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/embedding_hidden_mapping_in/add_grad/Sum" + input: "gradients/bert/encoder/embedding_hidden_mapping_in/add_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/key/einsum/transpose_grad/InvertPermutation" + op: "InvertPermutation" + input: "bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/key/einsum/transpose/perm" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/key/einsum/transpose_grad/transpose" + op: "Transpose" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/key/einsum/Reshape_1_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/key/einsum/transpose_grad/InvertPermutation" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tperm" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 12 + } + dim { + size: 64 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/query/Reshape_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/query/Reshape_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/query/einsum/transpose_grad/transpose" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/query/Reshape_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/embedding_hidden_mapping_in/einsum/Reshape_1_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\004\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/embedding_hidden_mapping_in/einsum/Reshape_1_grad/Reshape" + op: "Reshape" + input: "gradients/AddN_101" + input: "gradients/bert/encoder/embedding_hidden_mapping_in/einsum/Reshape_1_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/key/Reshape_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/key/Reshape_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/key/einsum/transpose_grad/transpose" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/key/Reshape_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/AddN_102" + op: "AddN" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/query/Reshape_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/query/Reshape_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/query/Reshape_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/query/Reshape_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/query/Reshape_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/query/Reshape_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/query/Reshape_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/query/Reshape_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/query/Reshape_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/query/Reshape_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/query/Reshape_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/query/Reshape_grad/Reshape" + attr { + key: "N" + value { + i: 12 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/query/Reshape_grad/Reshape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/embedding_hidden_mapping_in/einsum/MatMul_grad/MatMul" + op: "MatMul" + input: "gradients/bert/encoder/embedding_hidden_mapping_in/einsum/Reshape_1_grad/Reshape" + input: "bert/encoder/embedding_hidden_mapping_in/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: false + } + } + attr { + key: "transpose_b" + value { + b: true + } + } +} +node { + name: "gradients/bert/encoder/embedding_hidden_mapping_in/einsum/MatMul_grad/MatMul_1" + op: "MatMul" + input: "bert/encoder/embedding_hidden_mapping_in/einsum/Reshape" + input: "gradients/bert/encoder/embedding_hidden_mapping_in/einsum/Reshape_1_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 128 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "transpose_a" + value { + b: true + } + } + attr { + key: "transpose_b" + value { + b: false + } + } +} +node { + name: "gradients/AddN_103" + op: "AddN" + input: "gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/key/Reshape_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_10/layer_10/inner_group_0/attention_1/self/key/Reshape_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_9/layer_9/inner_group_0/attention_1/self/key/Reshape_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_8/layer_8/inner_group_0/attention_1/self/key/Reshape_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_7/layer_7/inner_group_0/attention_1/self/key/Reshape_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_6/layer_6/inner_group_0/attention_1/self/key/Reshape_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_5/layer_5/inner_group_0/attention_1/self/key/Reshape_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_4/layer_4/inner_group_0/attention_1/self/key/Reshape_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_3/layer_3/inner_group_0/attention_1/self/key/Reshape_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_2/layer_2/inner_group_0/attention_1/self/key/Reshape_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0_1/layer_1/inner_group_0/attention_1/self/key/Reshape_grad/Reshape" + input: "gradients/bert/encoder/transformer/group_0/layer_0/inner_group_0/attention_1/self/key/Reshape_grad/Reshape" + attr { + key: "N" + value { + i: 12 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/key/Reshape_grad/Reshape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "gradients/bert/encoder/embedding_hidden_mapping_in/einsum/Reshape_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\200\000\000\000" + } + } + } +} +node { + name: "gradients/bert/encoder/embedding_hidden_mapping_in/einsum/Reshape_grad/Reshape" + op: "Reshape" + input: "gradients/bert/encoder/embedding_hidden_mapping_in/einsum/MatMul_grad/MatMul" + input: "gradients/bert/encoder/embedding_hidden_mapping_in/einsum/Reshape_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/batchnorm/mul_1_grad/Mul" + op: "Mul" + input: "gradients/bert/encoder/embedding_hidden_mapping_in/einsum/Reshape_grad/Reshape" + input: "bert/embeddings/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/batchnorm/mul_1_grad/Mul_1" + op: "Mul" + input: "gradients/bert/encoder/embedding_hidden_mapping_in/einsum/Reshape_grad/Reshape" + input: "bert/embeddings/add_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs/s0" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 128 + } + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs/s1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\200\000\000\000" + } + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/embeddings/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs/s0" + input: "gradients/bert/embeddings/LayerNorm/batchnorm/sub_grad/BroadcastGradientArgs/s1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/batchnorm/sub_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/batchnorm/sub_grad/Sum" + op: "Sum" + input: "gradients/bert/encoder/embedding_hidden_mapping_in/einsum/Reshape_grad/Reshape" + input: "gradients/bert/embeddings/LayerNorm/batchnorm/sub_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 128 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/batchnorm/sub_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 128 + } + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/batchnorm/sub_grad/Reshape" + op: "Reshape" + input: "gradients/bert/embeddings/LayerNorm/batchnorm/sub_grad/Sum" + input: "gradients/bert/embeddings/LayerNorm/batchnorm/sub_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/batchnorm/sub_grad/Neg" + op: "Neg" + input: "gradients/bert/encoder/embedding_hidden_mapping_in/einsum/Reshape_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs/s0" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs/s1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\200\000\000\000" + } + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/embeddings/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs/s0" + input: "gradients/bert/embeddings/LayerNorm/batchnorm/mul_2_grad/BroadcastGradientArgs/s1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/batchnorm/mul_2_grad/Mul" + op: "Mul" + input: "gradients/bert/embeddings/LayerNorm/batchnorm/sub_grad/Neg" + input: "bert/embeddings/LayerNorm/batchnorm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/batchnorm/mul_2_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 2 + } + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/batchnorm/mul_2_grad/Sum" + op: "Sum" + input: "gradients/bert/embeddings/LayerNorm/batchnorm/mul_2_grad/Mul" + input: "gradients/bert/embeddings/LayerNorm/batchnorm/mul_2_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/batchnorm/mul_2_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/batchnorm/mul_2_grad/Reshape" + op: "Reshape" + input: "gradients/bert/embeddings/LayerNorm/batchnorm/mul_2_grad/Sum" + input: "gradients/bert/embeddings/LayerNorm/batchnorm/mul_2_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/batchnorm/mul_2_grad/Mul_1" + op: "Mul" + input: "bert/embeddings/LayerNorm/moments/mean" + input: "gradients/bert/embeddings/LayerNorm/batchnorm/sub_grad/Neg" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/AddN_104" + op: "AddN" + input: "gradients/bert/embeddings/LayerNorm/batchnorm/mul_1_grad/Mul_1" + input: "gradients/bert/embeddings/LayerNorm/batchnorm/mul_2_grad/Mul_1" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/embeddings/LayerNorm/batchnorm/mul_1_grad/Mul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs/s0" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs/s1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 128 + } + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/embeddings/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs/s0" + input: "gradients/bert/embeddings/LayerNorm/batchnorm/mul_grad/BroadcastGradientArgs/s1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/batchnorm/mul_grad/Mul" + op: "Mul" + input: "gradients/AddN_104" + input: "bert/embeddings/LayerNorm/gamma/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/batchnorm/mul_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 2 + } + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/batchnorm/mul_grad/Sum" + op: "Sum" + input: "gradients/bert/embeddings/LayerNorm/batchnorm/mul_grad/Mul" + input: "gradients/bert/embeddings/LayerNorm/batchnorm/mul_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/batchnorm/mul_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/batchnorm/mul_grad/Reshape" + op: "Reshape" + input: "gradients/bert/embeddings/LayerNorm/batchnorm/mul_grad/Sum" + input: "gradients/bert/embeddings/LayerNorm/batchnorm/mul_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/batchnorm/mul_grad/Mul_1" + op: "Mul" + input: "bert/embeddings/LayerNorm/batchnorm/Rsqrt" + input: "gradients/AddN_104" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/batchnorm/mul_grad/Sum_1/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/batchnorm/mul_grad/Sum_1" + op: "Sum" + input: "gradients/bert/embeddings/LayerNorm/batchnorm/mul_grad/Mul_1" + input: "gradients/bert/embeddings/LayerNorm/batchnorm/mul_grad/Sum_1/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 128 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/batchnorm/mul_grad/Reshape_1/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 128 + } + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/batchnorm/mul_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/embeddings/LayerNorm/batchnorm/mul_grad/Sum_1" + input: "gradients/bert/embeddings/LayerNorm/batchnorm/mul_grad/Reshape_1/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad" + op: "RsqrtGrad" + input: "bert/embeddings/LayerNorm/batchnorm/Rsqrt" + input: "gradients/bert/embeddings/LayerNorm/batchnorm/mul_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/batchnorm/add_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\000\000\000\000\001\000\000\000\002\000\000\000" + } + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/batchnorm/add_grad/Sum" + op: "Sum" + input: "gradients/bert/embeddings/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad" + input: "gradients/bert/embeddings/LayerNorm/batchnorm/add_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/batchnorm/add_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/batchnorm/add_grad/Reshape" + op: "Reshape" + input: "gradients/bert/embeddings/LayerNorm/batchnorm/add_grad/Sum" + input: "gradients/bert/embeddings/LayerNorm/batchnorm/add_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/moments/variance_grad/Cast/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\200\000\000\000" + } + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/moments/variance_grad/Cast_1/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 2 + } + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/moments/variance_grad/Size" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 3 + } + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/moments/variance_grad/add" + op: "AddV2" + input: "gradients/bert/embeddings/LayerNorm/moments/variance_grad/Cast_1/x" + input: "gradients/bert/embeddings/LayerNorm/moments/variance_grad/Size" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/moments/variance_grad/mod" + op: "FloorMod" + input: "gradients/bert/embeddings/LayerNorm/moments/variance_grad/add" + input: "gradients/bert/embeddings/LayerNorm/moments/variance_grad/Size" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/moments/variance_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/moments/variance_grad/range/start" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 0 + } + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/moments/variance_grad/range/delta" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/moments/variance_grad/range" + op: "Range" + input: "gradients/bert/embeddings/LayerNorm/moments/variance_grad/range/start" + input: "gradients/bert/embeddings/LayerNorm/moments/variance_grad/Size" + input: "gradients/bert/embeddings/LayerNorm/moments/variance_grad/range/delta" + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/moments/variance_grad/Fill/value" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/moments/variance_grad/Fill" + op: "Fill" + input: "gradients/bert/embeddings/LayerNorm/moments/variance_grad/Shape" + input: "gradients/bert/embeddings/LayerNorm/moments/variance_grad/Fill/value" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/moments/variance_grad/DynamicStitch" + op: "DynamicStitch" + input: "gradients/bert/embeddings/LayerNorm/moments/variance_grad/range" + input: "gradients/bert/embeddings/LayerNorm/moments/variance_grad/mod" + input: "gradients/bert/embeddings/LayerNorm/moments/variance_grad/Cast/x" + input: "gradients/bert/embeddings/LayerNorm/moments/variance_grad/Fill" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/moments/variance_grad/Maximum/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/moments/variance_grad/Maximum/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/moments/variance_grad/Maximum" + op: "Maximum" + input: "gradients/bert/embeddings/LayerNorm/moments/variance_grad/Maximum/x" + input: "gradients/bert/embeddings/LayerNorm/moments/variance_grad/Maximum/y" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/moments/variance_grad/floordiv/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\200\000\000\000" + } + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/moments/variance_grad/floordiv" + op: "FloorDiv" + input: "gradients/bert/embeddings/LayerNorm/moments/variance_grad/floordiv/x" + input: "gradients/bert/embeddings/LayerNorm/moments/variance_grad/Maximum" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/moments/variance_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/moments/variance_grad/Reshape" + op: "Reshape" + input: "gradients/bert/embeddings/LayerNorm/batchnorm/Rsqrt_grad/RsqrtGrad" + input: "gradients/bert/embeddings/LayerNorm/moments/variance_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/moments/variance_grad/Tile/multiples" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\001\000\000\000\001\000\000\000\200\000\000\000" + } + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/moments/variance_grad/Tile" + op: "Tile" + input: "gradients/bert/embeddings/LayerNorm/moments/variance_grad/Reshape" + input: "gradients/bert/embeddings/LayerNorm/moments/variance_grad/Tile/multiples" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tmultiples" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/moments/variance_grad/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 128.0 + } + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/moments/variance_grad/truediv" + op: "RealDiv" + input: "gradients/bert/embeddings/LayerNorm/moments/variance_grad/Tile" + input: "gradients/bert/embeddings/LayerNorm/moments/variance_grad/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/moments/SquaredDifference_grad/scalar" + op: "Const" + input: "^gradients/bert/embeddings/LayerNorm/moments/variance_grad/truediv" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 2.0 + } + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/moments/SquaredDifference_grad/Mul" + op: "Mul" + input: "gradients/bert/embeddings/LayerNorm/moments/SquaredDifference_grad/scalar" + input: "gradients/bert/embeddings/LayerNorm/moments/variance_grad/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/moments/SquaredDifference_grad/sub" + op: "Sub" + input: "bert/embeddings/add_1" + input: "bert/embeddings/LayerNorm/moments/StopGradient" + input: "^gradients/bert/embeddings/LayerNorm/moments/variance_grad/truediv" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/moments/SquaredDifference_grad/mul_1" + op: "Mul" + input: "gradients/bert/embeddings/LayerNorm/moments/SquaredDifference_grad/Mul" + input: "gradients/bert/embeddings/LayerNorm/moments/SquaredDifference_grad/sub" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs/s0" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\200\000\000\000" + } + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs/s1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/embeddings/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs/s0" + input: "gradients/bert/embeddings/LayerNorm/moments/SquaredDifference_grad/BroadcastGradientArgs/s1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/moments/SquaredDifference_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 2 + } + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/moments/SquaredDifference_grad/Sum" + op: "Sum" + input: "gradients/bert/embeddings/LayerNorm/moments/SquaredDifference_grad/mul_1" + input: "gradients/bert/embeddings/LayerNorm/moments/SquaredDifference_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/moments/SquaredDifference_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/moments/SquaredDifference_grad/Reshape" + op: "Reshape" + input: "gradients/bert/embeddings/LayerNorm/moments/SquaredDifference_grad/Sum" + input: "gradients/bert/embeddings/LayerNorm/moments/SquaredDifference_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/moments/SquaredDifference_grad/Neg" + op: "Neg" + input: "gradients/bert/embeddings/LayerNorm/moments/SquaredDifference_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/moments/mean_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/moments/mean_grad/Reshape" + op: "Reshape" + input: "gradients/bert/embeddings/LayerNorm/batchnorm/mul_2_grad/Reshape" + input: "gradients/bert/embeddings/LayerNorm/moments/mean_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/moments/mean_grad/Tile/multiples" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\001\000\000\000\001\000\000\000\200\000\000\000" + } + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/moments/mean_grad/Tile" + op: "Tile" + input: "gradients/bert/embeddings/LayerNorm/moments/mean_grad/Reshape" + input: "gradients/bert/embeddings/LayerNorm/moments/mean_grad/Tile/multiples" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tmultiples" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/moments/mean_grad/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 128.0 + } + } + } +} +node { + name: "gradients/bert/embeddings/LayerNorm/moments/mean_grad/truediv" + op: "RealDiv" + input: "gradients/bert/embeddings/LayerNorm/moments/mean_grad/Tile" + input: "gradients/bert/embeddings/LayerNorm/moments/mean_grad/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/AddN_105" + op: "AddN" + input: "gradients/bert/embeddings/LayerNorm/batchnorm/mul_1_grad/Mul" + input: "gradients/bert/embeddings/LayerNorm/moments/SquaredDifference_grad/mul_1" + input: "gradients/bert/embeddings/LayerNorm/moments/mean_grad/truediv" + attr { + key: "N" + value { + i: 3 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/embeddings/LayerNorm/batchnorm/mul_1_grad/Mul" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/embeddings/add_1_grad/BroadcastGradientArgs/s0" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\020\000\000\000@\000\000\000\200\000\000\000" + } + } + } +} +node { + name: "gradients/bert/embeddings/add_1_grad/BroadcastGradientArgs/s1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\001\000\000\000@\000\000\000\200\000\000\000" + } + } + } +} +node { + name: "gradients/bert/embeddings/add_1_grad/BroadcastGradientArgs" + op: "BroadcastGradientArgs" + input: "gradients/bert/embeddings/add_1_grad/BroadcastGradientArgs/s0" + input: "gradients/bert/embeddings/add_1_grad/BroadcastGradientArgs/s1" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "gradients/bert/embeddings/add_1_grad/Sum/reduction_indices" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 0 + } + } + } +} +node { + name: "gradients/bert/embeddings/add_1_grad/Sum" + op: "Sum" + input: "gradients/AddN_105" + input: "gradients/bert/embeddings/add_1_grad/Sum/reduction_indices" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 64 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "gradients/bert/embeddings/add_1_grad/Reshape/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 3 + } + } + tensor_content: "\001\000\000\000@\000\000\000\200\000\000\000" + } + } + } +} +node { + name: "gradients/bert/embeddings/add_1_grad/Reshape" + op: "Reshape" + input: "gradients/bert/embeddings/add_1_grad/Sum" + input: "gradients/bert/embeddings/add_1_grad/Reshape/shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 64 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/embeddings/Reshape_1_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "@\000\000\000\200\000\000\000" + } + } + } +} +node { + name: "gradients/bert/embeddings/Reshape_1_grad/Reshape" + op: "Reshape" + input: "gradients/bert/embeddings/add_1_grad/Reshape" + input: "gradients/bert/embeddings/Reshape_1_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 64 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/embeddings/Reshape_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 4 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 4 + } + } + tensor_content: "\020\000\000\000@\000\000\000\001\000\000\000\200\000\000\000" + } + } + } +} +node { + name: "gradients/bert/embeddings/Reshape_grad/Reshape" + op: "Reshape" + input: "gradients/AddN_105" + input: "gradients/bert/embeddings/Reshape_grad/Shape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 16 + } + dim { + size: 64 + } + dim { + size: 1 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/embeddings/Slice_grad/Rank" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 2 + } + } + } +} +node { + name: "gradients/bert/embeddings/Slice_grad/Shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "@\000\000\000\200\000\000\000" + } + } + } +} +node { + name: "gradients/bert/embeddings/Slice_grad/stack/1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/embeddings/Slice_grad/stack" + op: "Pack" + input: "gradients/bert/embeddings/Slice_grad/Rank" + input: "gradients/bert/embeddings/Slice_grad/stack/1" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "axis" + value { + i: 0 + } + } +} +node { + name: "gradients/bert/embeddings/Slice_grad/Reshape" + op: "Reshape" + input: "bert/embeddings/Slice/begin" + input: "gradients/bert/embeddings/Slice_grad/stack" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/embeddings/Slice_grad/Shape_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\002\000\000\200\000\000\000" + } + } + } +} +node { + name: "gradients/bert/embeddings/Slice_grad/sub" + op: "Sub" + input: "gradients/bert/embeddings/Slice_grad/Shape_1" + input: "gradients/bert/embeddings/Slice_grad/Shape" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/embeddings/Slice_grad/sub_1" + op: "Sub" + input: "gradients/bert/embeddings/Slice_grad/sub" + input: "bert/embeddings/Slice/begin" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/embeddings/Slice_grad/Reshape_1" + op: "Reshape" + input: "gradients/bert/embeddings/Slice_grad/sub_1" + input: "gradients/bert/embeddings/Slice_grad/stack" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/embeddings/Slice_grad/concat/axis" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/embeddings/Slice_grad/concat" + op: "ConcatV2" + input: "gradients/bert/embeddings/Slice_grad/Reshape" + input: "gradients/bert/embeddings/Slice_grad/Reshape_1" + input: "gradients/bert/embeddings/Slice_grad/concat/axis" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/embeddings/Slice_grad/Pad" + op: "Pad" + input: "gradients/bert/embeddings/Reshape_1_grad/Reshape" + input: "gradients/bert/embeddings/Slice_grad/concat" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tpaddings" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 512 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/embeddings/embedding_lookup_1_grad/Shape" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/token_type_embeddings" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT64 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT64 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\002\000\000\000\000\000\000\000\200\000\000\000\000\000\000\000" + } + } + } +} +node { + name: "gradients/bert/embeddings/embedding_lookup_1_grad/Cast" + op: "Cast" + input: "gradients/bert/embeddings/embedding_lookup_1_grad/Shape" + attr { + key: "DstT" + value { + type: DT_INT32 + } + } + attr { + key: "SrcT" + value { + type: DT_INT64 + } + } + attr { + key: "Truncate" + value { + b: false + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/token_type_embeddings" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/embeddings/embedding_lookup_1_grad/Size" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1024 + } + } + } +} +node { + name: "gradients/bert/embeddings/embedding_lookup_1_grad/ExpandDims/dim" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 0 + } + } + } +} +node { + name: "gradients/bert/embeddings/embedding_lookup_1_grad/ExpandDims" + op: "ExpandDims" + input: "gradients/bert/embeddings/embedding_lookup_1_grad/Size" + input: "gradients/bert/embeddings/embedding_lookup_1_grad/ExpandDims/dim" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "Tdim" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/embeddings/embedding_lookup_1_grad/strided_slice/stack" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/embeddings/embedding_lookup_1_grad/strided_slice/stack_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 0 + } + } + } +} +node { + name: "gradients/bert/embeddings/embedding_lookup_1_grad/strided_slice/stack_2" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/embeddings/embedding_lookup_1_grad/strided_slice" + op: "StridedSlice" + input: "gradients/bert/embeddings/embedding_lookup_1_grad/Cast" + input: "gradients/bert/embeddings/embedding_lookup_1_grad/strided_slice/stack" + input: "gradients/bert/embeddings/embedding_lookup_1_grad/strided_slice/stack_1" + input: "gradients/bert/embeddings/embedding_lookup_1_grad/strided_slice/stack_2" + attr { + key: "Index" + value { + type: DT_INT32 + } + } + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "begin_mask" + value { + i: 0 + } + } + attr { + key: "ellipsis_mask" + value { + i: 0 + } + } + attr { + key: "end_mask" + value { + i: 1 + } + } + attr { + key: "new_axis_mask" + value { + i: 0 + } + } + attr { + key: "shrink_axis_mask" + value { + i: 0 + } + } +} +node { + name: "gradients/bert/embeddings/embedding_lookup_1_grad/concat/axis" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 0 + } + } + } +} +node { + name: "gradients/bert/embeddings/embedding_lookup_1_grad/concat" + op: "ConcatV2" + input: "gradients/bert/embeddings/embedding_lookup_1_grad/ExpandDims" + input: "gradients/bert/embeddings/embedding_lookup_1_grad/strided_slice" + input: "gradients/bert/embeddings/embedding_lookup_1_grad/concat/axis" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/embeddings/embedding_lookup_1_grad/Reshape" + op: "Reshape" + input: "gradients/AddN_105" + input: "gradients/bert/embeddings/embedding_lookup_1_grad/concat" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/embeddings/embedding_lookup_1_grad/Reshape_1" + op: "Reshape" + input: "IteratorGetNext:6" + input: "gradients/bert/embeddings/embedding_lookup_1_grad/ExpandDims" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + } + } + } + } +} +node { + name: "gradients/bert/embeddings/embedding_lookup_grad/Shape" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/word_embeddings" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT64 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT64 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\326\234\001\000\000\000\000\000\200\000\000\000\000\000\000\000" + } + } + } +} +node { + name: "gradients/bert/embeddings/embedding_lookup_grad/Cast" + op: "Cast" + input: "gradients/bert/embeddings/embedding_lookup_grad/Shape" + attr { + key: "DstT" + value { + type: DT_INT32 + } + } + attr { + key: "SrcT" + value { + type: DT_INT64 + } + } + attr { + key: "Truncate" + value { + b: false + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/word_embeddings" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/embeddings/embedding_lookup_grad/Size" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1024 + } + } + } +} +node { + name: "gradients/bert/embeddings/embedding_lookup_grad/ExpandDims/dim" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 0 + } + } + } +} +node { + name: "gradients/bert/embeddings/embedding_lookup_grad/ExpandDims" + op: "ExpandDims" + input: "gradients/bert/embeddings/embedding_lookup_grad/Size" + input: "gradients/bert/embeddings/embedding_lookup_grad/ExpandDims/dim" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "Tdim" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "gradients/bert/embeddings/embedding_lookup_grad/strided_slice/stack" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/embeddings/embedding_lookup_grad/strided_slice/stack_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 0 + } + } + } +} +node { + name: "gradients/bert/embeddings/embedding_lookup_grad/strided_slice/stack_2" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "gradients/bert/embeddings/embedding_lookup_grad/strided_slice" + op: "StridedSlice" + input: "gradients/bert/embeddings/embedding_lookup_grad/Cast" + input: "gradients/bert/embeddings/embedding_lookup_grad/strided_slice/stack" + input: "gradients/bert/embeddings/embedding_lookup_grad/strided_slice/stack_1" + input: "gradients/bert/embeddings/embedding_lookup_grad/strided_slice/stack_2" + attr { + key: "Index" + value { + type: DT_INT32 + } + } + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "begin_mask" + value { + i: 0 + } + } + attr { + key: "ellipsis_mask" + value { + i: 0 + } + } + attr { + key: "end_mask" + value { + i: 1 + } + } + attr { + key: "new_axis_mask" + value { + i: 0 + } + } + attr { + key: "shrink_axis_mask" + value { + i: 0 + } + } +} +node { + name: "gradients/bert/embeddings/embedding_lookup_grad/concat/axis" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 0 + } + } + } +} +node { + name: "gradients/bert/embeddings/embedding_lookup_grad/concat" + op: "ConcatV2" + input: "gradients/bert/embeddings/embedding_lookup_grad/ExpandDims" + input: "gradients/bert/embeddings/embedding_lookup_grad/strided_slice" + input: "gradients/bert/embeddings/embedding_lookup_grad/concat/axis" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "gradients/bert/embeddings/embedding_lookup_grad/Reshape" + op: "Reshape" + input: "gradients/bert/embeddings/Reshape_grad/Reshape" + input: "gradients/bert/embeddings/embedding_lookup_grad/concat" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/bert/embeddings/embedding_lookup_grad/Reshape_1" + op: "Reshape" + input: "bert/embeddings/ExpandDims" + input: "gradients/bert/embeddings/embedding_lookup_grad/ExpandDims" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + } + } + } + } +} +node { + name: "gradients/AddN_106/strided_slice/stack" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 0 + } + } + } +} +node { + name: "gradients/AddN_106/strided_slice/stack_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "gradients/AddN_106/strided_slice/stack_2" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "gradients/AddN_106/strided_slice" + op: "StridedSlice" + input: "gradients/bert/embeddings/embedding_lookup_grad/Cast" + input: "gradients/AddN_106/strided_slice/stack" + input: "gradients/AddN_106/strided_slice/stack_1" + input: "gradients/AddN_106/strided_slice/stack_2" + attr { + key: "Index" + value { + type: DT_INT32 + } + } + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "begin_mask" + value { + i: 0 + } + } + attr { + key: "ellipsis_mask" + value { + i: 0 + } + } + attr { + key: "end_mask" + value { + i: 0 + } + } + attr { + key: "new_axis_mask" + value { + i: 0 + } + } + attr { + key: "shrink_axis_mask" + value { + i: 1 + } + } +} +node { + name: "gradients/AddN_106/inputs_1" + op: "UnsortedSegmentSum" + input: "gradients/bert/embeddings/embedding_lookup_grad/Reshape" + input: "gradients/bert/embeddings/embedding_lookup_grad/Reshape_1" + input: "gradients/AddN_106/strided_slice" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tindices" + value { + type: DT_INT32 + } + } + attr { + key: "Tnumsegments" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 105686 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "gradients/AddN_106" + op: "AddN" + input: "gradients/cls/predictions/MatMul_grad/MatMul_1" + input: "gradients/AddN_106/inputs_1" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 105686 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "global_norm/L2Loss" + op: "L2Loss" + input: "gradients/AddN_106" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/AddN_106" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_1" + op: "L2Loss" + input: "gradients/bert/embeddings/embedding_lookup_1_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/embeddings/embedding_lookup_1_grad/Reshape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_2" + op: "L2Loss" + input: "gradients/bert/embeddings/Slice_grad/Pad" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/embeddings/Slice_grad/Pad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_3" + op: "L2Loss" + input: "gradients/bert/embeddings/LayerNorm/batchnorm/sub_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/embeddings/LayerNorm/batchnorm/sub_grad/Reshape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_4" + op: "L2Loss" + input: "gradients/bert/embeddings/LayerNorm/batchnorm/mul_grad/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/embeddings/LayerNorm/batchnorm/mul_grad/Reshape_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_5" + op: "L2Loss" + input: "gradients/bert/encoder/embedding_hidden_mapping_in/einsum/MatMul_grad/MatMul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/embedding_hidden_mapping_in/einsum/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_6" + op: "L2Loss" + input: "gradients/bert/encoder/embedding_hidden_mapping_in/add_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/embedding_hidden_mapping_in/add_grad/Reshape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_7" + op: "L2Loss" + input: "gradients/AddN_102" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/query/Reshape_grad/Reshape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_8" + op: "L2Loss" + input: "gradients/AddN_98" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/query/Reshape_1_grad/Reshape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_9" + op: "L2Loss" + input: "gradients/AddN_103" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/key/Reshape_grad/Reshape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_10" + op: "L2Loss" + input: "gradients/AddN_99" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/key/Reshape_1_grad/Reshape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_11" + op: "L2Loss" + input: "gradients/AddN_100" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/value/Reshape_grad/Reshape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_12" + op: "L2Loss" + input: "gradients/AddN_97" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/value/Reshape_1_grad/Reshape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_13" + op: "L2Loss" + input: "gradients/AddN_96" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/output/dense/Reshape_grad/Reshape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_14" + op: "L2Loss" + input: "gradients/AddN_95" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/output/dense/add_grad/Reshape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_15" + op: "L2Loss" + input: "gradients/AddN_91" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm/batchnorm/sub_grad/Reshape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_16" + op: "L2Loss" + input: "gradients/AddN_93" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm/batchnorm/mul_grad/Reshape_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_17" + op: "L2Loss" + input: "gradients/AddN_89" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/dense/einsum/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_18" + op: "L2Loss" + input: "gradients/AddN_88" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/dense/add_grad/Reshape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_19" + op: "L2Loss" + input: "gradients/AddN_86" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/output/dense/einsum/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_20" + op: "L2Loss" + input: "gradients/AddN_85" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/output/dense/add_grad/Reshape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_21" + op: "L2Loss" + input: "gradients/AddN_81" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm_1/batchnorm/sub_grad/Reshape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_22" + op: "L2Loss" + input: "gradients/AddN_83" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Reshape_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_23" + op: "L2Loss" + input: "gradients/bert/pooler/dense/MatMul_grad/MatMul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/pooler/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_24" + op: "L2Loss" + input: "gradients/bert/pooler/dense/BiasAdd_grad/BiasAddGrad" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/pooler/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_25" + op: "L2Loss" + input: "gradients/cls/predictions/transform/dense/MatMul_grad/MatMul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/cls/predictions/transform/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_26" + op: "L2Loss" + input: "gradients/cls/predictions/transform/dense/BiasAdd_grad/BiasAddGrad" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/cls/predictions/transform/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_27" + op: "L2Loss" + input: "gradients/cls/predictions/transform/LayerNorm/batchnorm/sub_grad/Reshape" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/cls/predictions/transform/LayerNorm/batchnorm/sub_grad/Reshape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_28" + op: "L2Loss" + input: "gradients/cls/predictions/transform/LayerNorm/batchnorm/mul_grad/Reshape_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/cls/predictions/transform/LayerNorm/batchnorm/mul_grad/Reshape_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_29" + op: "L2Loss" + input: "gradients/cls/predictions/BiasAdd_grad/BiasAddGrad" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/cls/predictions/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_30" + op: "L2Loss" + input: "gradients/cls/seq_relationship/MatMul_grad/MatMul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/cls/seq_relationship/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/L2Loss_31" + op: "L2Loss" + input: "gradients/cls/seq_relationship/BiasAdd_grad/BiasAddGrad" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/cls/seq_relationship/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/stack" + op: "Pack" + input: "global_norm/L2Loss" + input: "global_norm/L2Loss_1" + input: "global_norm/L2Loss_2" + input: "global_norm/L2Loss_3" + input: "global_norm/L2Loss_4" + input: "global_norm/L2Loss_5" + input: "global_norm/L2Loss_6" + input: "global_norm/L2Loss_7" + input: "global_norm/L2Loss_8" + input: "global_norm/L2Loss_9" + input: "global_norm/L2Loss_10" + input: "global_norm/L2Loss_11" + input: "global_norm/L2Loss_12" + input: "global_norm/L2Loss_13" + input: "global_norm/L2Loss_14" + input: "global_norm/L2Loss_15" + input: "global_norm/L2Loss_16" + input: "global_norm/L2Loss_17" + input: "global_norm/L2Loss_18" + input: "global_norm/L2Loss_19" + input: "global_norm/L2Loss_20" + input: "global_norm/L2Loss_21" + input: "global_norm/L2Loss_22" + input: "global_norm/L2Loss_23" + input: "global_norm/L2Loss_24" + input: "global_norm/L2Loss_25" + input: "global_norm/L2Loss_26" + input: "global_norm/L2Loss_27" + input: "global_norm/L2Loss_28" + input: "global_norm/L2Loss_29" + input: "global_norm/L2Loss_30" + input: "global_norm/L2Loss_31" + attr { + key: "N" + value { + i: 32 + } + } + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 32 + } + } + } + } + } + attr { + key: "axis" + value { + i: 0 + } + } +} +node { + name: "global_norm/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 0 + } + } + } +} +node { + name: "global_norm/Sum" + op: "Sum" + input: "global_norm/stack" + input: "global_norm/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "global_norm/Const_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 2.0 + } + } + } +} +node { + name: "global_norm/mul" + op: "Mul" + input: "global_norm/Sum" + input: "global_norm/Const_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "global_norm/global_norm" + op: "Sqrt" + input: "global_norm/mul" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "clip_by_global_norm/truediv/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "clip_by_global_norm/truediv" + op: "RealDiv" + input: "clip_by_global_norm/truediv/x" + input: "global_norm/global_norm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "clip_by_global_norm/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "clip_by_global_norm/truediv_1/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "clip_by_global_norm/truediv_1" + op: "RealDiv" + input: "clip_by_global_norm/Const" + input: "clip_by_global_norm/truediv_1/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "clip_by_global_norm/Minimum" + op: "Minimum" + input: "clip_by_global_norm/truediv" + input: "clip_by_global_norm/truediv_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "clip_by_global_norm/mul/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "clip_by_global_norm/mul" + op: "Mul" + input: "clip_by_global_norm/mul/x" + input: "clip_by_global_norm/Minimum" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "clip_by_global_norm/IsFinite" + op: "IsFinite" + input: "global_norm/global_norm" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "clip_by_global_norm/Const_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: nan + } + } + } +} +node { + name: "clip_by_global_norm/Select" + op: "Select" + input: "clip_by_global_norm/IsFinite" + input: "clip_by_global_norm/mul" + input: "clip_by_global_norm/Const_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_1" + op: "Mul" + input: "gradients/AddN_106" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/AddN_106" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 105686 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_0" + op: "Identity" + input: "clip_by_global_norm/mul_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/AddN_106" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 105686 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_2" + op: "Mul" + input: "gradients/bert/embeddings/embedding_lookup_1_grad/Reshape" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/embeddings/embedding_lookup_1_grad/Reshape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_1" + op: "Identity" + input: "clip_by_global_norm/mul_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/embeddings/embedding_lookup_1_grad/Reshape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1024 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_3" + op: "Mul" + input: "gradients/bert/embeddings/Slice_grad/Pad" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/embeddings/Slice_grad/Pad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 512 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_2" + op: "Identity" + input: "clip_by_global_norm/mul_3" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/embeddings/Slice_grad/Pad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 512 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_4" + op: "Mul" + input: "gradients/bert/embeddings/LayerNorm/batchnorm/sub_grad/Reshape" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/embeddings/LayerNorm/batchnorm/sub_grad/Reshape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 128 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_3" + op: "Identity" + input: "clip_by_global_norm/mul_4" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/embeddings/LayerNorm/batchnorm/sub_grad/Reshape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 128 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_5" + op: "Mul" + input: "gradients/bert/embeddings/LayerNorm/batchnorm/mul_grad/Reshape_1" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/embeddings/LayerNorm/batchnorm/mul_grad/Reshape_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 128 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_4" + op: "Identity" + input: "clip_by_global_norm/mul_5" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/embeddings/LayerNorm/batchnorm/mul_grad/Reshape_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 128 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_6" + op: "Mul" + input: "gradients/bert/encoder/embedding_hidden_mapping_in/einsum/MatMul_grad/MatMul_1" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/embedding_hidden_mapping_in/einsum/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 128 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_5" + op: "Identity" + input: "clip_by_global_norm/mul_6" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/embedding_hidden_mapping_in/einsum/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 128 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_7" + op: "Mul" + input: "gradients/bert/encoder/embedding_hidden_mapping_in/add_grad/Reshape" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/embedding_hidden_mapping_in/add_grad/Reshape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_6" + op: "Identity" + input: "clip_by_global_norm/mul_7" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/embedding_hidden_mapping_in/add_grad/Reshape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_8" + op: "Mul" + input: "gradients/AddN_102" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/query/Reshape_grad/Reshape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_7" + op: "Identity" + input: "clip_by_global_norm/mul_8" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/query/Reshape_grad/Reshape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_9" + op: "Mul" + input: "gradients/AddN_98" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/query/Reshape_1_grad/Reshape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_8" + op: "Identity" + input: "clip_by_global_norm/mul_9" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/query/Reshape_1_grad/Reshape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_10" + op: "Mul" + input: "gradients/AddN_103" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/key/Reshape_grad/Reshape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_9" + op: "Identity" + input: "clip_by_global_norm/mul_10" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/key/Reshape_grad/Reshape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_11" + op: "Mul" + input: "gradients/AddN_99" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/key/Reshape_1_grad/Reshape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_10" + op: "Identity" + input: "clip_by_global_norm/mul_11" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/key/Reshape_1_grad/Reshape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_12" + op: "Mul" + input: "gradients/AddN_100" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/value/Reshape_grad/Reshape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_11" + op: "Identity" + input: "clip_by_global_norm/mul_12" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/value/Reshape_grad/Reshape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_13" + op: "Mul" + input: "gradients/AddN_97" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/value/Reshape_1_grad/Reshape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_12" + op: "Identity" + input: "clip_by_global_norm/mul_13" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/self/value/Reshape_1_grad/Reshape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_14" + op: "Mul" + input: "gradients/AddN_96" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/output/dense/Reshape_grad/Reshape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_13" + op: "Identity" + input: "clip_by_global_norm/mul_14" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/output/dense/Reshape_grad/Reshape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_15" + op: "Mul" + input: "gradients/AddN_95" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/output/dense/add_grad/Reshape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_14" + op: "Identity" + input: "clip_by_global_norm/mul_15" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/attention_1/output/dense/add_grad/Reshape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_16" + op: "Mul" + input: "gradients/AddN_91" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm/batchnorm/sub_grad/Reshape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_15" + op: "Identity" + input: "clip_by_global_norm/mul_16" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm/batchnorm/sub_grad/Reshape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_17" + op: "Mul" + input: "gradients/AddN_93" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm/batchnorm/mul_grad/Reshape_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_16" + op: "Identity" + input: "clip_by_global_norm/mul_17" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm/batchnorm/mul_grad/Reshape_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_18" + op: "Mul" + input: "gradients/AddN_89" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/dense/einsum/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_17" + op: "Identity" + input: "clip_by_global_norm/mul_18" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/dense/einsum/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_19" + op: "Mul" + input: "gradients/AddN_88" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/dense/add_grad/Reshape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_18" + op: "Identity" + input: "clip_by_global_norm/mul_19" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/dense/add_grad/Reshape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_20" + op: "Mul" + input: "gradients/AddN_86" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/output/dense/einsum/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_19" + op: "Identity" + input: "clip_by_global_norm/mul_20" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/output/dense/einsum/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_21" + op: "Mul" + input: "gradients/AddN_85" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/output/dense/add_grad/Reshape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_20" + op: "Identity" + input: "clip_by_global_norm/mul_21" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/ffn_1/intermediate/output/dense/add_grad/Reshape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_22" + op: "Mul" + input: "gradients/AddN_81" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm_1/batchnorm/sub_grad/Reshape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_21" + op: "Identity" + input: "clip_by_global_norm/mul_22" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm_1/batchnorm/sub_grad/Reshape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_23" + op: "Mul" + input: "gradients/AddN_83" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Reshape_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_22" + op: "Identity" + input: "clip_by_global_norm/mul_23" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/encoder/transformer/group_0_11/layer_11/inner_group_0/LayerNorm_1/batchnorm/mul_grad/Reshape_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_24" + op: "Mul" + input: "gradients/bert/pooler/dense/MatMul_grad/MatMul_1" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/pooler/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_23" + op: "Identity" + input: "clip_by_global_norm/mul_24" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/pooler/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_25" + op: "Mul" + input: "gradients/bert/pooler/dense/BiasAdd_grad/BiasAddGrad" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/pooler/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_24" + op: "Identity" + input: "clip_by_global_norm/mul_25" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/bert/pooler/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_26" + op: "Mul" + input: "gradients/cls/predictions/transform/dense/MatMul_grad/MatMul_1" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/cls/predictions/transform/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_25" + op: "Identity" + input: "clip_by_global_norm/mul_26" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/cls/predictions/transform/dense/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_27" + op: "Mul" + input: "gradients/cls/predictions/transform/dense/BiasAdd_grad/BiasAddGrad" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/cls/predictions/transform/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 128 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_26" + op: "Identity" + input: "clip_by_global_norm/mul_27" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/cls/predictions/transform/dense/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 128 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_28" + op: "Mul" + input: "gradients/cls/predictions/transform/LayerNorm/batchnorm/sub_grad/Reshape" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/cls/predictions/transform/LayerNorm/batchnorm/sub_grad/Reshape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 128 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_27" + op: "Identity" + input: "clip_by_global_norm/mul_28" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/cls/predictions/transform/LayerNorm/batchnorm/sub_grad/Reshape" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 128 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_29" + op: "Mul" + input: "gradients/cls/predictions/transform/LayerNorm/batchnorm/mul_grad/Reshape_1" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/cls/predictions/transform/LayerNorm/batchnorm/mul_grad/Reshape_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 128 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_28" + op: "Identity" + input: "clip_by_global_norm/mul_29" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/cls/predictions/transform/LayerNorm/batchnorm/mul_grad/Reshape_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 128 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_30" + op: "Mul" + input: "gradients/cls/predictions/BiasAdd_grad/BiasAddGrad" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/cls/predictions/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 105686 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_29" + op: "Identity" + input: "clip_by_global_norm/mul_30" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/cls/predictions/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 105686 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_31" + op: "Mul" + input: "gradients/cls/seq_relationship/MatMul_grad/MatMul_1" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/cls/seq_relationship/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_30" + op: "Identity" + input: "clip_by_global_norm/mul_31" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/cls/seq_relationship/MatMul_grad/MatMul_1" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/mul_32" + op: "Mul" + input: "gradients/cls/seq_relationship/BiasAdd_grad/BiasAddGrad" + input: "clip_by_global_norm/Select" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/cls/seq_relationship/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "clip_by_global_norm/clip_by_global_norm/_31" + op: "Identity" + input: "clip_by_global_norm/mul_32" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@gradients/cls/seq_relationship/BiasAdd_grad/BiasAddGrad" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "bert/embeddings/word_embeddings/adam_m/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/word_embeddings/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\326\234\001\000\200\000\000\000" + } + } + } +} +node { + name: "bert/embeddings/word_embeddings/adam_m/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/word_embeddings/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/embeddings/word_embeddings/adam_m/Initializer/zeros" + op: "Fill" + input: "bert/embeddings/word_embeddings/adam_m/Initializer/zeros/shape_as_tensor" + input: "bert/embeddings/word_embeddings/adam_m/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/word_embeddings/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 105686 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/embeddings/word_embeddings/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/word_embeddings/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 105686 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 105686 + } + dim { + size: 128 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/embeddings/word_embeddings/adam_m/Assign" + op: "Assign" + input: "bert/embeddings/word_embeddings/adam_m" + input: "bert/embeddings/word_embeddings/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/word_embeddings/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 105686 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/embeddings/word_embeddings/adam_m/read" + op: "Identity" + input: "bert/embeddings/word_embeddings/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/word_embeddings/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 105686 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/embeddings/word_embeddings/adam_v/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/word_embeddings/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\326\234\001\000\200\000\000\000" + } + } + } +} +node { + name: "bert/embeddings/word_embeddings/adam_v/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/word_embeddings/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/embeddings/word_embeddings/adam_v/Initializer/zeros" + op: "Fill" + input: "bert/embeddings/word_embeddings/adam_v/Initializer/zeros/shape_as_tensor" + input: "bert/embeddings/word_embeddings/adam_v/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/word_embeddings/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 105686 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/embeddings/word_embeddings/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/word_embeddings/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 105686 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 105686 + } + dim { + size: 128 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/embeddings/word_embeddings/adam_v/Assign" + op: "Assign" + input: "bert/embeddings/word_embeddings/adam_v" + input: "bert/embeddings/word_embeddings/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/word_embeddings/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 105686 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/embeddings/word_embeddings/adam_v/read" + op: "Identity" + input: "bert/embeddings/word_embeddings/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/word_embeddings/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 105686 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "Mul_4/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_4" + op: "Mul" + input: "Mul_4/x" + input: "bert/embeddings/word_embeddings/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 105686 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "Mul_5/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_5" + op: "Mul" + input: "Mul_5/x" + input: "clip_by_global_norm/clip_by_global_norm/_0" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 105686 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "add_3" + op: "AddV2" + input: "Mul_4" + input: "Mul_5" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 105686 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "Mul_6/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_6" + op: "Mul" + input: "Mul_6/x" + input: "bert/embeddings/word_embeddings/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 105686 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "Square" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_0" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 105686 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "Mul_7/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_7" + op: "Mul" + input: "Mul_7/x" + input: "Square" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 105686 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "add_4" + op: "AddV2" + input: "Mul_6" + input: "Mul_7" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 105686 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "Sqrt" + op: "Sqrt" + input: "add_4" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 105686 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "add_5/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_5" + op: "AddV2" + input: "Sqrt" + input: "add_5/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 105686 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "truediv_1" + op: "RealDiv" + input: "add_3" + input: "add_5" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 105686 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "mul_8/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.009999999776482582 + } + } + } +} +node { + name: "mul_8" + op: "Mul" + input: "mul_8/x" + input: "bert/embeddings/word_embeddings/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 105686 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "add_6" + op: "AddV2" + input: "truediv_1" + input: "mul_8" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 105686 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "norm/mul" + op: "Mul" + input: "bert/embeddings/word_embeddings/read" + input: "bert/embeddings/word_embeddings/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 105686 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "norm/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "norm/Sum" + op: "Sum" + input: "norm/mul" + input: "norm/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: true + } + } +} +node { + name: "norm/Sqrt" + op: "Sqrt" + input: "norm/Sum" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "norm/Squeeze" + op: "Squeeze" + input: "norm/Sqrt" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "squeeze_dims" + value { + list { + } + } + } +} +node { + name: "norm_1/mul" + op: "Mul" + input: "add_6" + input: "add_6" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 105686 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "norm_1/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "norm_1/Sum" + op: "Sum" + input: "norm_1/mul" + input: "norm_1/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: true + } + } +} +node { + name: "norm_1/Sqrt" + op: "Sqrt" + input: "norm_1/Sum" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "norm_1/Squeeze" + op: "Squeeze" + input: "norm_1/Sqrt" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "squeeze_dims" + value { + list { + } + } + } +} +node { + name: "Greater/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "Greater" + op: "Greater" + input: "norm/Squeeze" + input: "Greater/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "Greater_1/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "Greater_1" + op: "Greater" + input: "norm_1/Squeeze" + input: "Greater_1/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "truediv_2" + op: "RealDiv" + input: "norm/Squeeze" + input: "norm_1/Squeeze" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "Select/e" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "Select" + op: "Select" + input: "Greater_1" + input: "truediv_2" + input: "Select/e" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "Select_1/e" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "Select_1" + op: "Select" + input: "Greater" + input: "Select" + input: "Select_1/e" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "mul_9" + op: "Mul" + input: "Select_1" + input: "add_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "mul_10" + op: "Mul" + input: "mul_9" + input: "add_6" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 105686 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "sub_2" + op: "Sub" + input: "bert/embeddings/word_embeddings/read" + input: "mul_10" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 105686 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "Assign" + op: "Assign" + input: "bert/embeddings/word_embeddings" + input: "sub_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/word_embeddings" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 105686 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_1" + op: "Assign" + input: "bert/embeddings/word_embeddings/adam_m" + input: "add_3" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/word_embeddings/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 105686 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_2" + op: "Assign" + input: "bert/embeddings/word_embeddings/adam_v" + input: "add_4" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/word_embeddings/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 105686 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/embeddings/token_type_embeddings/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/token_type_embeddings/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 2 + } + dim { + size: 128 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/embeddings/token_type_embeddings/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/token_type_embeddings/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 2 + } + dim { + size: 128 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/embeddings/token_type_embeddings/adam_m/Assign" + op: "Assign" + input: "bert/embeddings/token_type_embeddings/adam_m" + input: "bert/embeddings/token_type_embeddings/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/token_type_embeddings/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/embeddings/token_type_embeddings/adam_m/read" + op: "Identity" + input: "bert/embeddings/token_type_embeddings/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/token_type_embeddings/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/embeddings/token_type_embeddings/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/token_type_embeddings/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 2 + } + dim { + size: 128 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/embeddings/token_type_embeddings/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/token_type_embeddings/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 2 + } + dim { + size: 128 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/embeddings/token_type_embeddings/adam_v/Assign" + op: "Assign" + input: "bert/embeddings/token_type_embeddings/adam_v" + input: "bert/embeddings/token_type_embeddings/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/token_type_embeddings/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/embeddings/token_type_embeddings/adam_v/read" + op: "Identity" + input: "bert/embeddings/token_type_embeddings/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/token_type_embeddings/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "Mul_11/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_11" + op: "Mul" + input: "Mul_11/x" + input: "bert/embeddings/token_type_embeddings/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "Mul_12/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_12/strided_slice/stack" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 0 + } + } + } +} +node { + name: "Mul_12/strided_slice/stack_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "Mul_12/strided_slice/stack_2" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "Mul_12/strided_slice" + op: "StridedSlice" + input: "gradients/bert/embeddings/embedding_lookup_1_grad/Cast" + input: "Mul_12/strided_slice/stack" + input: "Mul_12/strided_slice/stack_1" + input: "Mul_12/strided_slice/stack_2" + attr { + key: "Index" + value { + type: DT_INT32 + } + } + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "begin_mask" + value { + i: 0 + } + } + attr { + key: "ellipsis_mask" + value { + i: 0 + } + } + attr { + key: "end_mask" + value { + i: 0 + } + } + attr { + key: "new_axis_mask" + value { + i: 0 + } + } + attr { + key: "shrink_axis_mask" + value { + i: 1 + } + } +} +node { + name: "Mul_12/y" + op: "UnsortedSegmentSum" + input: "clip_by_global_norm/clip_by_global_norm/_1" + input: "gradients/bert/embeddings/embedding_lookup_1_grad/Reshape_1" + input: "Mul_12/strided_slice" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tindices" + value { + type: DT_INT32 + } + } + attr { + key: "Tnumsegments" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "Mul_12" + op: "Mul" + input: "Mul_12/x" + input: "Mul_12/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "add_7" + op: "AddV2" + input: "Mul_11" + input: "Mul_12" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "Mul_13/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_13" + op: "Mul" + input: "Mul_13/x" + input: "bert/embeddings/token_type_embeddings/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "Square_1/strided_slice/stack" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 0 + } + } + } +} +node { + name: "Square_1/strided_slice/stack_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "Square_1/strided_slice/stack_2" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "Square_1/strided_slice" + op: "StridedSlice" + input: "gradients/bert/embeddings/embedding_lookup_1_grad/Cast" + input: "Square_1/strided_slice/stack" + input: "Square_1/strided_slice/stack_1" + input: "Square_1/strided_slice/stack_2" + attr { + key: "Index" + value { + type: DT_INT32 + } + } + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "begin_mask" + value { + i: 0 + } + } + attr { + key: "ellipsis_mask" + value { + i: 0 + } + } + attr { + key: "end_mask" + value { + i: 0 + } + } + attr { + key: "new_axis_mask" + value { + i: 0 + } + } + attr { + key: "shrink_axis_mask" + value { + i: 1 + } + } +} +node { + name: "Square_1/x" + op: "UnsortedSegmentSum" + input: "clip_by_global_norm/clip_by_global_norm/_1" + input: "gradients/bert/embeddings/embedding_lookup_1_grad/Reshape_1" + input: "Square_1/strided_slice" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tindices" + value { + type: DT_INT32 + } + } + attr { + key: "Tnumsegments" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "Square_1" + op: "Square" + input: "Square_1/x" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "Mul_14/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_14" + op: "Mul" + input: "Mul_14/x" + input: "Square_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "add_8" + op: "AddV2" + input: "Mul_13" + input: "Mul_14" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "Sqrt_1" + op: "Sqrt" + input: "add_8" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "add_9/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_9" + op: "AddV2" + input: "Sqrt_1" + input: "add_9/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "truediv_3" + op: "RealDiv" + input: "add_7" + input: "add_9" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "mul_15/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.009999999776482582 + } + } + } +} +node { + name: "mul_15" + op: "Mul" + input: "mul_15/x" + input: "bert/embeddings/token_type_embeddings/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "add_10" + op: "AddV2" + input: "truediv_3" + input: "mul_15" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "norm_2/mul" + op: "Mul" + input: "bert/embeddings/token_type_embeddings/read" + input: "bert/embeddings/token_type_embeddings/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "norm_2/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "norm_2/Sum" + op: "Sum" + input: "norm_2/mul" + input: "norm_2/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: true + } + } +} +node { + name: "norm_2/Sqrt" + op: "Sqrt" + input: "norm_2/Sum" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "norm_2/Squeeze" + op: "Squeeze" + input: "norm_2/Sqrt" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "squeeze_dims" + value { + list { + } + } + } +} +node { + name: "norm_3/mul" + op: "Mul" + input: "add_10" + input: "add_10" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "norm_3/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "norm_3/Sum" + op: "Sum" + input: "norm_3/mul" + input: "norm_3/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: true + } + } +} +node { + name: "norm_3/Sqrt" + op: "Sqrt" + input: "norm_3/Sum" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "norm_3/Squeeze" + op: "Squeeze" + input: "norm_3/Sqrt" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "squeeze_dims" + value { + list { + } + } + } +} +node { + name: "Greater_2/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "Greater_2" + op: "Greater" + input: "norm_2/Squeeze" + input: "Greater_2/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "Greater_3/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "Greater_3" + op: "Greater" + input: "norm_3/Squeeze" + input: "Greater_3/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "truediv_4" + op: "RealDiv" + input: "norm_2/Squeeze" + input: "norm_3/Squeeze" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "Select_2/e" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "Select_2" + op: "Select" + input: "Greater_3" + input: "truediv_4" + input: "Select_2/e" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "Select_3/e" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "Select_3" + op: "Select" + input: "Greater_2" + input: "Select_2" + input: "Select_3/e" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "mul_16" + op: "Mul" + input: "Select_3" + input: "add_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "mul_17" + op: "Mul" + input: "mul_16" + input: "add_10" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "sub_3" + op: "Sub" + input: "bert/embeddings/token_type_embeddings/read" + input: "mul_17" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "Assign_3" + op: "Assign" + input: "bert/embeddings/token_type_embeddings" + input: "sub_3" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/token_type_embeddings" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_4" + op: "Assign" + input: "bert/embeddings/token_type_embeddings/adam_m" + input: "add_7" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/token_type_embeddings/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_5" + op: "Assign" + input: "bert/embeddings/token_type_embeddings/adam_v" + input: "add_8" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/token_type_embeddings/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/embeddings/position_embeddings/adam_m/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/position_embeddings/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\002\000\000\200\000\000\000" + } + } + } +} +node { + name: "bert/embeddings/position_embeddings/adam_m/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/position_embeddings/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/embeddings/position_embeddings/adam_m/Initializer/zeros" + op: "Fill" + input: "bert/embeddings/position_embeddings/adam_m/Initializer/zeros/shape_as_tensor" + input: "bert/embeddings/position_embeddings/adam_m/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/position_embeddings/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 512 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/embeddings/position_embeddings/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/position_embeddings/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 512 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 512 + } + dim { + size: 128 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/embeddings/position_embeddings/adam_m/Assign" + op: "Assign" + input: "bert/embeddings/position_embeddings/adam_m" + input: "bert/embeddings/position_embeddings/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/position_embeddings/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 512 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/embeddings/position_embeddings/adam_m/read" + op: "Identity" + input: "bert/embeddings/position_embeddings/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/position_embeddings/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 512 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/embeddings/position_embeddings/adam_v/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/position_embeddings/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\002\000\000\200\000\000\000" + } + } + } +} +node { + name: "bert/embeddings/position_embeddings/adam_v/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/position_embeddings/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/embeddings/position_embeddings/adam_v/Initializer/zeros" + op: "Fill" + input: "bert/embeddings/position_embeddings/adam_v/Initializer/zeros/shape_as_tensor" + input: "bert/embeddings/position_embeddings/adam_v/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/position_embeddings/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 512 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/embeddings/position_embeddings/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/position_embeddings/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 512 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 512 + } + dim { + size: 128 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/embeddings/position_embeddings/adam_v/Assign" + op: "Assign" + input: "bert/embeddings/position_embeddings/adam_v" + input: "bert/embeddings/position_embeddings/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/position_embeddings/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 512 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/embeddings/position_embeddings/adam_v/read" + op: "Identity" + input: "bert/embeddings/position_embeddings/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/position_embeddings/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 512 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "Mul_18/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_18" + op: "Mul" + input: "Mul_18/x" + input: "bert/embeddings/position_embeddings/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 512 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "Mul_19/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_19" + op: "Mul" + input: "Mul_19/x" + input: "clip_by_global_norm/clip_by_global_norm/_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 512 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "add_11" + op: "AddV2" + input: "Mul_18" + input: "Mul_19" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 512 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "Mul_20/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_20" + op: "Mul" + input: "Mul_20/x" + input: "bert/embeddings/position_embeddings/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 512 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "Square_2" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 512 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "Mul_21/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_21" + op: "Mul" + input: "Mul_21/x" + input: "Square_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 512 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "add_12" + op: "AddV2" + input: "Mul_20" + input: "Mul_21" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 512 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "Sqrt_2" + op: "Sqrt" + input: "add_12" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 512 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "add_13/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_13" + op: "AddV2" + input: "Sqrt_2" + input: "add_13/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 512 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "truediv_5" + op: "RealDiv" + input: "add_11" + input: "add_13" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 512 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "mul_22/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.009999999776482582 + } + } + } +} +node { + name: "mul_22" + op: "Mul" + input: "mul_22/x" + input: "bert/embeddings/position_embeddings/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 512 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "add_14" + op: "AddV2" + input: "truediv_5" + input: "mul_22" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 512 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "norm_4/mul" + op: "Mul" + input: "bert/embeddings/position_embeddings/read" + input: "bert/embeddings/position_embeddings/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 512 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "norm_4/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "norm_4/Sum" + op: "Sum" + input: "norm_4/mul" + input: "norm_4/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: true + } + } +} +node { + name: "norm_4/Sqrt" + op: "Sqrt" + input: "norm_4/Sum" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "norm_4/Squeeze" + op: "Squeeze" + input: "norm_4/Sqrt" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "squeeze_dims" + value { + list { + } + } + } +} +node { + name: "norm_5/mul" + op: "Mul" + input: "add_14" + input: "add_14" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 512 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "norm_5/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "norm_5/Sum" + op: "Sum" + input: "norm_5/mul" + input: "norm_5/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: true + } + } +} +node { + name: "norm_5/Sqrt" + op: "Sqrt" + input: "norm_5/Sum" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "norm_5/Squeeze" + op: "Squeeze" + input: "norm_5/Sqrt" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "squeeze_dims" + value { + list { + } + } + } +} +node { + name: "Greater_4/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "Greater_4" + op: "Greater" + input: "norm_4/Squeeze" + input: "Greater_4/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "Greater_5/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "Greater_5" + op: "Greater" + input: "norm_5/Squeeze" + input: "Greater_5/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "truediv_6" + op: "RealDiv" + input: "norm_4/Squeeze" + input: "norm_5/Squeeze" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "Select_4/e" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "Select_4" + op: "Select" + input: "Greater_5" + input: "truediv_6" + input: "Select_4/e" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "Select_5/e" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "Select_5" + op: "Select" + input: "Greater_4" + input: "Select_4" + input: "Select_5/e" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "mul_23" + op: "Mul" + input: "Select_5" + input: "add_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "mul_24" + op: "Mul" + input: "mul_23" + input: "add_14" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 512 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "sub_4" + op: "Sub" + input: "bert/embeddings/position_embeddings/read" + input: "mul_24" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 512 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "Assign_6" + op: "Assign" + input: "bert/embeddings/position_embeddings" + input: "sub_4" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/position_embeddings" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 512 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_7" + op: "Assign" + input: "bert/embeddings/position_embeddings/adam_m" + input: "add_11" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/position_embeddings/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 512 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_8" + op: "Assign" + input: "bert/embeddings/position_embeddings/adam_v" + input: "add_12" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/position_embeddings/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 512 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/embeddings/LayerNorm/beta/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 128 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 128 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/embeddings/LayerNorm/beta/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 128 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 128 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/embeddings/LayerNorm/beta/adam_m/Assign" + op: "Assign" + input: "bert/embeddings/LayerNorm/beta/adam_m" + input: "bert/embeddings/LayerNorm/beta/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 128 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/embeddings/LayerNorm/beta/adam_m/read" + op: "Identity" + input: "bert/embeddings/LayerNorm/beta/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/embeddings/LayerNorm/beta/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 128 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 128 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/embeddings/LayerNorm/beta/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 128 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 128 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/embeddings/LayerNorm/beta/adam_v/Assign" + op: "Assign" + input: "bert/embeddings/LayerNorm/beta/adam_v" + input: "bert/embeddings/LayerNorm/beta/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 128 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/embeddings/LayerNorm/beta/adam_v/read" + op: "Identity" + input: "bert/embeddings/LayerNorm/beta/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 128 + } + } + } + } + } +} +node { + name: "Mul_25/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_25" + op: "Mul" + input: "Mul_25/x" + input: "bert/embeddings/LayerNorm/beta/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 128 + } + } + } + } + } +} +node { + name: "Mul_26/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_26" + op: "Mul" + input: "Mul_26/x" + input: "clip_by_global_norm/clip_by_global_norm/_3" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 128 + } + } + } + } + } +} +node { + name: "add_15" + op: "AddV2" + input: "Mul_25" + input: "Mul_26" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 128 + } + } + } + } + } +} +node { + name: "Mul_27/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_27" + op: "Mul" + input: "Mul_27/x" + input: "bert/embeddings/LayerNorm/beta/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 128 + } + } + } + } + } +} +node { + name: "Square_3" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_3" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 128 + } + } + } + } + } +} +node { + name: "Mul_28/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_28" + op: "Mul" + input: "Mul_28/x" + input: "Square_3" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 128 + } + } + } + } + } +} +node { + name: "add_16" + op: "AddV2" + input: "Mul_27" + input: "Mul_28" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 128 + } + } + } + } + } +} +node { + name: "Sqrt_3" + op: "Sqrt" + input: "add_16" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 128 + } + } + } + } + } +} +node { + name: "add_17/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_17" + op: "AddV2" + input: "Sqrt_3" + input: "add_17/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 128 + } + } + } + } + } +} +node { + name: "truediv_7" + op: "RealDiv" + input: "add_15" + input: "add_17" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 128 + } + } + } + } + } +} +node { + name: "mul_29/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "mul_29" + op: "Mul" + input: "mul_29/x" + input: "add_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "mul_30" + op: "Mul" + input: "mul_29" + input: "truediv_7" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 128 + } + } + } + } + } +} +node { + name: "sub_5" + op: "Sub" + input: "bert/embeddings/LayerNorm/beta/read" + input: "mul_30" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 128 + } + } + } + } + } +} +node { + name: "Assign_9" + op: "Assign" + input: "bert/embeddings/LayerNorm/beta" + input: "sub_5" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 128 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_10" + op: "Assign" + input: "bert/embeddings/LayerNorm/beta/adam_m" + input: "add_15" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 128 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_11" + op: "Assign" + input: "bert/embeddings/LayerNorm/beta/adam_v" + input: "add_16" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 128 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/embeddings/LayerNorm/gamma/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 128 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 128 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/embeddings/LayerNorm/gamma/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 128 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 128 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/embeddings/LayerNorm/gamma/adam_m/Assign" + op: "Assign" + input: "bert/embeddings/LayerNorm/gamma/adam_m" + input: "bert/embeddings/LayerNorm/gamma/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 128 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/embeddings/LayerNorm/gamma/adam_m/read" + op: "Identity" + input: "bert/embeddings/LayerNorm/gamma/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 128 + } + } + } + } + } +} +node { + name: "bert/embeddings/LayerNorm/gamma/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 128 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 128 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/embeddings/LayerNorm/gamma/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 128 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 128 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/embeddings/LayerNorm/gamma/adam_v/Assign" + op: "Assign" + input: "bert/embeddings/LayerNorm/gamma/adam_v" + input: "bert/embeddings/LayerNorm/gamma/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 128 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/embeddings/LayerNorm/gamma/adam_v/read" + op: "Identity" + input: "bert/embeddings/LayerNorm/gamma/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 128 + } + } + } + } + } +} +node { + name: "Mul_31/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_31" + op: "Mul" + input: "Mul_31/x" + input: "bert/embeddings/LayerNorm/gamma/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 128 + } + } + } + } + } +} +node { + name: "Mul_32/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_32" + op: "Mul" + input: "Mul_32/x" + input: "clip_by_global_norm/clip_by_global_norm/_4" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 128 + } + } + } + } + } +} +node { + name: "add_18" + op: "AddV2" + input: "Mul_31" + input: "Mul_32" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 128 + } + } + } + } + } +} +node { + name: "Mul_33/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_33" + op: "Mul" + input: "Mul_33/x" + input: "bert/embeddings/LayerNorm/gamma/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 128 + } + } + } + } + } +} +node { + name: "Square_4" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_4" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 128 + } + } + } + } + } +} +node { + name: "Mul_34/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_34" + op: "Mul" + input: "Mul_34/x" + input: "Square_4" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 128 + } + } + } + } + } +} +node { + name: "add_19" + op: "AddV2" + input: "Mul_33" + input: "Mul_34" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 128 + } + } + } + } + } +} +node { + name: "Sqrt_4" + op: "Sqrt" + input: "add_19" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 128 + } + } + } + } + } +} +node { + name: "add_20/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_20" + op: "AddV2" + input: "Sqrt_4" + input: "add_20/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 128 + } + } + } + } + } +} +node { + name: "truediv_8" + op: "RealDiv" + input: "add_18" + input: "add_20" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 128 + } + } + } + } + } +} +node { + name: "mul_35/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "mul_35" + op: "Mul" + input: "mul_35/x" + input: "add_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "mul_36" + op: "Mul" + input: "mul_35" + input: "truediv_8" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 128 + } + } + } + } + } +} +node { + name: "sub_6" + op: "Sub" + input: "bert/embeddings/LayerNorm/gamma/read" + input: "mul_36" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 128 + } + } + } + } + } +} +node { + name: "Assign_12" + op: "Assign" + input: "bert/embeddings/LayerNorm/gamma" + input: "sub_6" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 128 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_13" + op: "Assign" + input: "bert/embeddings/LayerNorm/gamma/adam_m" + input: "add_18" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 128 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_14" + op: "Assign" + input: "bert/embeddings/LayerNorm/gamma/adam_v" + input: "add_19" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 128 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/embedding_hidden_mapping_in/kernel/adam_m/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/embedding_hidden_mapping_in/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\200\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/embedding_hidden_mapping_in/kernel/adam_m/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/embedding_hidden_mapping_in/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/embedding_hidden_mapping_in/kernel/adam_m/Initializer/zeros" + op: "Fill" + input: "bert/encoder/embedding_hidden_mapping_in/kernel/adam_m/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/embedding_hidden_mapping_in/kernel/adam_m/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/embedding_hidden_mapping_in/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 128 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/embedding_hidden_mapping_in/kernel/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/embedding_hidden_mapping_in/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 128 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 128 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/embedding_hidden_mapping_in/kernel/adam_m/Assign" + op: "Assign" + input: "bert/encoder/embedding_hidden_mapping_in/kernel/adam_m" + input: "bert/encoder/embedding_hidden_mapping_in/kernel/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/embedding_hidden_mapping_in/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 128 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/embedding_hidden_mapping_in/kernel/adam_m/read" + op: "Identity" + input: "bert/encoder/embedding_hidden_mapping_in/kernel/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/embedding_hidden_mapping_in/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 128 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/embedding_hidden_mapping_in/kernel/adam_v/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/embedding_hidden_mapping_in/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\200\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/embedding_hidden_mapping_in/kernel/adam_v/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/embedding_hidden_mapping_in/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/embedding_hidden_mapping_in/kernel/adam_v/Initializer/zeros" + op: "Fill" + input: "bert/encoder/embedding_hidden_mapping_in/kernel/adam_v/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/embedding_hidden_mapping_in/kernel/adam_v/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/embedding_hidden_mapping_in/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 128 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/embedding_hidden_mapping_in/kernel/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/embedding_hidden_mapping_in/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 128 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 128 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/embedding_hidden_mapping_in/kernel/adam_v/Assign" + op: "Assign" + input: "bert/encoder/embedding_hidden_mapping_in/kernel/adam_v" + input: "bert/encoder/embedding_hidden_mapping_in/kernel/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/embedding_hidden_mapping_in/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 128 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/embedding_hidden_mapping_in/kernel/adam_v/read" + op: "Identity" + input: "bert/encoder/embedding_hidden_mapping_in/kernel/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/embedding_hidden_mapping_in/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 128 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_37/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_37" + op: "Mul" + input: "Mul_37/x" + input: "bert/encoder/embedding_hidden_mapping_in/kernel/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 128 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_38/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_38" + op: "Mul" + input: "Mul_38/x" + input: "clip_by_global_norm/clip_by_global_norm/_5" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 128 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_21" + op: "AddV2" + input: "Mul_37" + input: "Mul_38" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 128 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_39/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_39" + op: "Mul" + input: "Mul_39/x" + input: "bert/encoder/embedding_hidden_mapping_in/kernel/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 128 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_5" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_5" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 128 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_40/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_40" + op: "Mul" + input: "Mul_40/x" + input: "Square_5" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 128 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_22" + op: "AddV2" + input: "Mul_39" + input: "Mul_40" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 128 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_5" + op: "Sqrt" + input: "add_22" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 128 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_23/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_23" + op: "AddV2" + input: "Sqrt_5" + input: "add_23/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 128 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_9" + op: "RealDiv" + input: "add_21" + input: "add_23" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 128 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_41/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.009999999776482582 + } + } + } +} +node { + name: "mul_41" + op: "Mul" + input: "mul_41/x" + input: "bert/encoder/embedding_hidden_mapping_in/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 128 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_24" + op: "AddV2" + input: "truediv_9" + input: "mul_41" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 128 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "norm_6/mul" + op: "Mul" + input: "bert/encoder/embedding_hidden_mapping_in/kernel/read" + input: "bert/encoder/embedding_hidden_mapping_in/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 128 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "norm_6/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "norm_6/Sum" + op: "Sum" + input: "norm_6/mul" + input: "norm_6/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: true + } + } +} +node { + name: "norm_6/Sqrt" + op: "Sqrt" + input: "norm_6/Sum" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "norm_6/Squeeze" + op: "Squeeze" + input: "norm_6/Sqrt" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "squeeze_dims" + value { + list { + } + } + } +} +node { + name: "norm_7/mul" + op: "Mul" + input: "add_24" + input: "add_24" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 128 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "norm_7/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "norm_7/Sum" + op: "Sum" + input: "norm_7/mul" + input: "norm_7/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: true + } + } +} +node { + name: "norm_7/Sqrt" + op: "Sqrt" + input: "norm_7/Sum" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "norm_7/Squeeze" + op: "Squeeze" + input: "norm_7/Sqrt" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "squeeze_dims" + value { + list { + } + } + } +} +node { + name: "Greater_6/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "Greater_6" + op: "Greater" + input: "norm_6/Squeeze" + input: "Greater_6/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "Greater_7/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "Greater_7" + op: "Greater" + input: "norm_7/Squeeze" + input: "Greater_7/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "truediv_10" + op: "RealDiv" + input: "norm_6/Squeeze" + input: "norm_7/Squeeze" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "Select_6/e" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "Select_6" + op: "Select" + input: "Greater_7" + input: "truediv_10" + input: "Select_6/e" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "Select_7/e" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "Select_7" + op: "Select" + input: "Greater_6" + input: "Select_6" + input: "Select_7/e" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "mul_42" + op: "Mul" + input: "Select_7" + input: "add_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "mul_43" + op: "Mul" + input: "mul_42" + input: "add_24" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 128 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_7" + op: "Sub" + input: "bert/encoder/embedding_hidden_mapping_in/kernel/read" + input: "mul_43" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 128 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_15" + op: "Assign" + input: "bert/encoder/embedding_hidden_mapping_in/kernel" + input: "sub_7" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/embedding_hidden_mapping_in/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 128 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_16" + op: "Assign" + input: "bert/encoder/embedding_hidden_mapping_in/kernel/adam_m" + input: "add_21" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/embedding_hidden_mapping_in/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 128 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_17" + op: "Assign" + input: "bert/encoder/embedding_hidden_mapping_in/kernel/adam_v" + input: "add_22" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/embedding_hidden_mapping_in/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 128 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/embedding_hidden_mapping_in/bias/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/embedding_hidden_mapping_in/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/embedding_hidden_mapping_in/bias/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/embedding_hidden_mapping_in/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/embedding_hidden_mapping_in/bias/adam_m/Assign" + op: "Assign" + input: "bert/encoder/embedding_hidden_mapping_in/bias/adam_m" + input: "bert/encoder/embedding_hidden_mapping_in/bias/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/embedding_hidden_mapping_in/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/embedding_hidden_mapping_in/bias/adam_m/read" + op: "Identity" + input: "bert/encoder/embedding_hidden_mapping_in/bias/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/embedding_hidden_mapping_in/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/embedding_hidden_mapping_in/bias/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/embedding_hidden_mapping_in/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/embedding_hidden_mapping_in/bias/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/embedding_hidden_mapping_in/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/embedding_hidden_mapping_in/bias/adam_v/Assign" + op: "Assign" + input: "bert/encoder/embedding_hidden_mapping_in/bias/adam_v" + input: "bert/encoder/embedding_hidden_mapping_in/bias/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/embedding_hidden_mapping_in/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/embedding_hidden_mapping_in/bias/adam_v/read" + op: "Identity" + input: "bert/encoder/embedding_hidden_mapping_in/bias/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/embedding_hidden_mapping_in/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_44/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_44" + op: "Mul" + input: "Mul_44/x" + input: "bert/encoder/embedding_hidden_mapping_in/bias/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_45/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_45" + op: "Mul" + input: "Mul_45/x" + input: "clip_by_global_norm/clip_by_global_norm/_6" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_25" + op: "AddV2" + input: "Mul_44" + input: "Mul_45" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_46/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_46" + op: "Mul" + input: "Mul_46/x" + input: "bert/encoder/embedding_hidden_mapping_in/bias/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_6" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_6" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_47/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_47" + op: "Mul" + input: "Mul_47/x" + input: "Square_6" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_26" + op: "AddV2" + input: "Mul_46" + input: "Mul_47" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_6" + op: "Sqrt" + input: "add_26" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_27/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_27" + op: "AddV2" + input: "Sqrt_6" + input: "add_27/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_11" + op: "RealDiv" + input: "add_25" + input: "add_27" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_48/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "mul_48" + op: "Mul" + input: "mul_48/x" + input: "add_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "mul_49" + op: "Mul" + input: "mul_48" + input: "truediv_11" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_8" + op: "Sub" + input: "bert/encoder/embedding_hidden_mapping_in/bias/read" + input: "mul_49" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_18" + op: "Assign" + input: "bert/encoder/embedding_hidden_mapping_in/bias" + input: "sub_8" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/embedding_hidden_mapping_in/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_19" + op: "Assign" + input: "bert/encoder/embedding_hidden_mapping_in/bias/adam_m" + input: "add_25" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/embedding_hidden_mapping_in/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_20" + op: "Assign" + input: "bert/encoder/embedding_hidden_mapping_in/bias/adam_v" + input: "add_26" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/embedding_hidden_mapping_in/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/kernel/adam_m/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/kernel/adam_m/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/kernel/adam_m/Initializer/zeros" + op: "Fill" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/kernel/adam_m/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/kernel/adam_m/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/kernel/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/kernel/adam_m/Assign" + op: "Assign" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/kernel/adam_m" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/kernel/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/kernel/adam_m/read" + op: "Identity" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/kernel/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/kernel/adam_v/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/kernel/adam_v/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/kernel/adam_v/Initializer/zeros" + op: "Fill" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/kernel/adam_v/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/kernel/adam_v/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/kernel/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/kernel/adam_v/Assign" + op: "Assign" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/kernel/adam_v" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/kernel/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/kernel/adam_v/read" + op: "Identity" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/kernel/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_50/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_50" + op: "Mul" + input: "Mul_50/x" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/kernel/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_51/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_51" + op: "Mul" + input: "Mul_51/x" + input: "clip_by_global_norm/clip_by_global_norm/_7" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_28" + op: "AddV2" + input: "Mul_50" + input: "Mul_51" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_52/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_52" + op: "Mul" + input: "Mul_52/x" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/kernel/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_7" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_7" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_53/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_53" + op: "Mul" + input: "Mul_53/x" + input: "Square_7" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_29" + op: "AddV2" + input: "Mul_52" + input: "Mul_53" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_7" + op: "Sqrt" + input: "add_29" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_30/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_30" + op: "AddV2" + input: "Sqrt_7" + input: "add_30/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_12" + op: "RealDiv" + input: "add_28" + input: "add_30" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_54/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.009999999776482582 + } + } + } +} +node { + name: "mul_54" + op: "Mul" + input: "mul_54/x" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_31" + op: "AddV2" + input: "truediv_12" + input: "mul_54" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "norm_8/mul" + op: "Mul" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/kernel/read" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "norm_8/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "norm_8/Sum" + op: "Sum" + input: "norm_8/mul" + input: "norm_8/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: true + } + } +} +node { + name: "norm_8/Sqrt" + op: "Sqrt" + input: "norm_8/Sum" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "norm_8/Squeeze" + op: "Squeeze" + input: "norm_8/Sqrt" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "squeeze_dims" + value { + list { + } + } + } +} +node { + name: "norm_9/mul" + op: "Mul" + input: "add_31" + input: "add_31" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "norm_9/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "norm_9/Sum" + op: "Sum" + input: "norm_9/mul" + input: "norm_9/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: true + } + } +} +node { + name: "norm_9/Sqrt" + op: "Sqrt" + input: "norm_9/Sum" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "norm_9/Squeeze" + op: "Squeeze" + input: "norm_9/Sqrt" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "squeeze_dims" + value { + list { + } + } + } +} +node { + name: "Greater_8/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "Greater_8" + op: "Greater" + input: "norm_8/Squeeze" + input: "Greater_8/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "Greater_9/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "Greater_9" + op: "Greater" + input: "norm_9/Squeeze" + input: "Greater_9/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "truediv_13" + op: "RealDiv" + input: "norm_8/Squeeze" + input: "norm_9/Squeeze" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "Select_8/e" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "Select_8" + op: "Select" + input: "Greater_9" + input: "truediv_13" + input: "Select_8/e" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "Select_9/e" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "Select_9" + op: "Select" + input: "Greater_8" + input: "Select_8" + input: "Select_9/e" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "mul_55" + op: "Mul" + input: "Select_9" + input: "add_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "mul_56" + op: "Mul" + input: "mul_55" + input: "add_31" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_9" + op: "Sub" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/kernel/read" + input: "mul_56" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_21" + op: "Assign" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/kernel" + input: "sub_9" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_22" + op: "Assign" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/kernel/adam_m" + input: "add_28" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_23" + op: "Assign" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/kernel/adam_v" + input: "add_29" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/bias/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/bias/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/bias/adam_m/Assign" + op: "Assign" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/bias/adam_m" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/bias/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/bias/adam_m/read" + op: "Identity" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/bias/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/bias/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/bias/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/bias/adam_v/Assign" + op: "Assign" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/bias/adam_v" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/bias/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/bias/adam_v/read" + op: "Identity" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/bias/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_57/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_57" + op: "Mul" + input: "Mul_57/x" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/bias/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_58/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_58" + op: "Mul" + input: "Mul_58/x" + input: "clip_by_global_norm/clip_by_global_norm/_8" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_32" + op: "AddV2" + input: "Mul_57" + input: "Mul_58" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_59/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_59" + op: "Mul" + input: "Mul_59/x" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/bias/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_8" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_8" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_60/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_60" + op: "Mul" + input: "Mul_60/x" + input: "Square_8" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_33" + op: "AddV2" + input: "Mul_59" + input: "Mul_60" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_8" + op: "Sqrt" + input: "add_33" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_34/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_34" + op: "AddV2" + input: "Sqrt_8" + input: "add_34/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_14" + op: "RealDiv" + input: "add_32" + input: "add_34" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_61/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "mul_61" + op: "Mul" + input: "mul_61/x" + input: "add_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "mul_62" + op: "Mul" + input: "mul_61" + input: "truediv_14" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_10" + op: "Sub" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/bias/read" + input: "mul_62" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_24" + op: "Assign" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/bias" + input: "sub_10" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_25" + op: "Assign" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/bias/adam_m" + input: "add_32" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_26" + op: "Assign" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/bias/adam_v" + input: "add_33" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/kernel/adam_m/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/kernel/adam_m/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/kernel/adam_m/Initializer/zeros" + op: "Fill" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/kernel/adam_m/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/kernel/adam_m/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/kernel/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/kernel/adam_m/Assign" + op: "Assign" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/kernel/adam_m" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/kernel/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/kernel/adam_m/read" + op: "Identity" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/kernel/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/kernel/adam_v/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/kernel/adam_v/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/kernel/adam_v/Initializer/zeros" + op: "Fill" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/kernel/adam_v/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/kernel/adam_v/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/kernel/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/kernel/adam_v/Assign" + op: "Assign" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/kernel/adam_v" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/kernel/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/kernel/adam_v/read" + op: "Identity" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/kernel/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_63/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_63" + op: "Mul" + input: "Mul_63/x" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/kernel/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_64/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_64" + op: "Mul" + input: "Mul_64/x" + input: "clip_by_global_norm/clip_by_global_norm/_9" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_35" + op: "AddV2" + input: "Mul_63" + input: "Mul_64" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_65/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_65" + op: "Mul" + input: "Mul_65/x" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/kernel/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_9" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_9" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_66/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_66" + op: "Mul" + input: "Mul_66/x" + input: "Square_9" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_36" + op: "AddV2" + input: "Mul_65" + input: "Mul_66" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_9" + op: "Sqrt" + input: "add_36" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_37/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_37" + op: "AddV2" + input: "Sqrt_9" + input: "add_37/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_15" + op: "RealDiv" + input: "add_35" + input: "add_37" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_67/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.009999999776482582 + } + } + } +} +node { + name: "mul_67" + op: "Mul" + input: "mul_67/x" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_38" + op: "AddV2" + input: "truediv_15" + input: "mul_67" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "norm_10/mul" + op: "Mul" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/kernel/read" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "norm_10/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "norm_10/Sum" + op: "Sum" + input: "norm_10/mul" + input: "norm_10/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: true + } + } +} +node { + name: "norm_10/Sqrt" + op: "Sqrt" + input: "norm_10/Sum" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "norm_10/Squeeze" + op: "Squeeze" + input: "norm_10/Sqrt" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "squeeze_dims" + value { + list { + } + } + } +} +node { + name: "norm_11/mul" + op: "Mul" + input: "add_38" + input: "add_38" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "norm_11/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "norm_11/Sum" + op: "Sum" + input: "norm_11/mul" + input: "norm_11/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: true + } + } +} +node { + name: "norm_11/Sqrt" + op: "Sqrt" + input: "norm_11/Sum" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "norm_11/Squeeze" + op: "Squeeze" + input: "norm_11/Sqrt" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "squeeze_dims" + value { + list { + } + } + } +} +node { + name: "Greater_10/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "Greater_10" + op: "Greater" + input: "norm_10/Squeeze" + input: "Greater_10/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "Greater_11/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "Greater_11" + op: "Greater" + input: "norm_11/Squeeze" + input: "Greater_11/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "truediv_16" + op: "RealDiv" + input: "norm_10/Squeeze" + input: "norm_11/Squeeze" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "Select_10/e" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "Select_10" + op: "Select" + input: "Greater_11" + input: "truediv_16" + input: "Select_10/e" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "Select_11/e" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "Select_11" + op: "Select" + input: "Greater_10" + input: "Select_10" + input: "Select_11/e" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "mul_68" + op: "Mul" + input: "Select_11" + input: "add_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "mul_69" + op: "Mul" + input: "mul_68" + input: "add_38" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_11" + op: "Sub" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/kernel/read" + input: "mul_69" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_27" + op: "Assign" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/kernel" + input: "sub_11" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_28" + op: "Assign" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/kernel/adam_m" + input: "add_35" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_29" + op: "Assign" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/kernel/adam_v" + input: "add_36" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/bias/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/bias/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/bias/adam_m/Assign" + op: "Assign" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/bias/adam_m" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/bias/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/bias/adam_m/read" + op: "Identity" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/bias/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/bias/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/bias/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/bias/adam_v/Assign" + op: "Assign" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/bias/adam_v" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/bias/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/bias/adam_v/read" + op: "Identity" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/bias/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_70/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_70" + op: "Mul" + input: "Mul_70/x" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/bias/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_71/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_71" + op: "Mul" + input: "Mul_71/x" + input: "clip_by_global_norm/clip_by_global_norm/_10" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_39" + op: "AddV2" + input: "Mul_70" + input: "Mul_71" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_72/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_72" + op: "Mul" + input: "Mul_72/x" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/bias/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_10" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_10" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_73/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_73" + op: "Mul" + input: "Mul_73/x" + input: "Square_10" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_40" + op: "AddV2" + input: "Mul_72" + input: "Mul_73" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_10" + op: "Sqrt" + input: "add_40" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_41/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_41" + op: "AddV2" + input: "Sqrt_10" + input: "add_41/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_17" + op: "RealDiv" + input: "add_39" + input: "add_41" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_74/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "mul_74" + op: "Mul" + input: "mul_74/x" + input: "add_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "mul_75" + op: "Mul" + input: "mul_74" + input: "truediv_17" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_12" + op: "Sub" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/bias/read" + input: "mul_75" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_30" + op: "Assign" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/bias" + input: "sub_12" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_31" + op: "Assign" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/bias/adam_m" + input: "add_39" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_32" + op: "Assign" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/bias/adam_v" + input: "add_40" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/value/kernel/adam_m/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/value/kernel/adam_m/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/value/kernel/adam_m/Initializer/zeros" + op: "Fill" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/value/kernel/adam_m/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/value/kernel/adam_m/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/value/kernel/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/value/kernel/adam_m/Assign" + op: "Assign" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/value/kernel/adam_m" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/value/kernel/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/value/kernel/adam_m/read" + op: "Identity" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/value/kernel/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/value/kernel/adam_v/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/value/kernel/adam_v/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/value/kernel/adam_v/Initializer/zeros" + op: "Fill" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/value/kernel/adam_v/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/value/kernel/adam_v/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/value/kernel/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/value/kernel/adam_v/Assign" + op: "Assign" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/value/kernel/adam_v" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/value/kernel/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/value/kernel/adam_v/read" + op: "Identity" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/value/kernel/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_76/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_76" + op: "Mul" + input: "Mul_76/x" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/value/kernel/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_77/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_77" + op: "Mul" + input: "Mul_77/x" + input: "clip_by_global_norm/clip_by_global_norm/_11" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_42" + op: "AddV2" + input: "Mul_76" + input: "Mul_77" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_78/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_78" + op: "Mul" + input: "Mul_78/x" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/value/kernel/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_11" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_11" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_79/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_79" + op: "Mul" + input: "Mul_79/x" + input: "Square_11" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_43" + op: "AddV2" + input: "Mul_78" + input: "Mul_79" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_11" + op: "Sqrt" + input: "add_43" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_44/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_44" + op: "AddV2" + input: "Sqrt_11" + input: "add_44/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_18" + op: "RealDiv" + input: "add_42" + input: "add_44" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_80/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.009999999776482582 + } + } + } +} +node { + name: "mul_80" + op: "Mul" + input: "mul_80/x" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/value/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_45" + op: "AddV2" + input: "truediv_18" + input: "mul_80" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "norm_12/mul" + op: "Mul" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/value/kernel/read" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/value/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "norm_12/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "norm_12/Sum" + op: "Sum" + input: "norm_12/mul" + input: "norm_12/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: true + } + } +} +node { + name: "norm_12/Sqrt" + op: "Sqrt" + input: "norm_12/Sum" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "norm_12/Squeeze" + op: "Squeeze" + input: "norm_12/Sqrt" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "squeeze_dims" + value { + list { + } + } + } +} +node { + name: "norm_13/mul" + op: "Mul" + input: "add_45" + input: "add_45" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "norm_13/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "norm_13/Sum" + op: "Sum" + input: "norm_13/mul" + input: "norm_13/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: true + } + } +} +node { + name: "norm_13/Sqrt" + op: "Sqrt" + input: "norm_13/Sum" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "norm_13/Squeeze" + op: "Squeeze" + input: "norm_13/Sqrt" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "squeeze_dims" + value { + list { + } + } + } +} +node { + name: "Greater_12/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "Greater_12" + op: "Greater" + input: "norm_12/Squeeze" + input: "Greater_12/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "Greater_13/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "Greater_13" + op: "Greater" + input: "norm_13/Squeeze" + input: "Greater_13/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "truediv_19" + op: "RealDiv" + input: "norm_12/Squeeze" + input: "norm_13/Squeeze" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "Select_12/e" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "Select_12" + op: "Select" + input: "Greater_13" + input: "truediv_19" + input: "Select_12/e" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "Select_13/e" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "Select_13" + op: "Select" + input: "Greater_12" + input: "Select_12" + input: "Select_13/e" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "mul_81" + op: "Mul" + input: "Select_13" + input: "add_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "mul_82" + op: "Mul" + input: "mul_81" + input: "add_45" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_13" + op: "Sub" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/value/kernel/read" + input: "mul_82" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_33" + op: "Assign" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/value/kernel" + input: "sub_13" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_34" + op: "Assign" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/value/kernel/adam_m" + input: "add_42" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_35" + op: "Assign" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/value/kernel/adam_v" + input: "add_43" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/value/bias/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/self/value/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/value/bias/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/self/value/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/value/bias/adam_m/Assign" + op: "Assign" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/value/bias/adam_m" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/value/bias/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/self/value/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/value/bias/adam_m/read" + op: "Identity" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/value/bias/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/self/value/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/value/bias/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/self/value/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/value/bias/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/self/value/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/value/bias/adam_v/Assign" + op: "Assign" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/value/bias/adam_v" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/value/bias/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/self/value/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/value/bias/adam_v/read" + op: "Identity" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/value/bias/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/self/value/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_83/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_83" + op: "Mul" + input: "Mul_83/x" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/value/bias/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_84/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_84" + op: "Mul" + input: "Mul_84/x" + input: "clip_by_global_norm/clip_by_global_norm/_12" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_46" + op: "AddV2" + input: "Mul_83" + input: "Mul_84" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_85/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_85" + op: "Mul" + input: "Mul_85/x" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/value/bias/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_12" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_12" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_86/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_86" + op: "Mul" + input: "Mul_86/x" + input: "Square_12" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_47" + op: "AddV2" + input: "Mul_85" + input: "Mul_86" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_12" + op: "Sqrt" + input: "add_47" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_48/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_48" + op: "AddV2" + input: "Sqrt_12" + input: "add_48/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_20" + op: "RealDiv" + input: "add_46" + input: "add_48" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_87/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "mul_87" + op: "Mul" + input: "mul_87/x" + input: "add_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "mul_88" + op: "Mul" + input: "mul_87" + input: "truediv_20" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_14" + op: "Sub" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/value/bias/read" + input: "mul_88" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_36" + op: "Assign" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/value/bias" + input: "sub_14" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/self/value/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_37" + op: "Assign" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/value/bias/adam_m" + input: "add_46" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/self/value/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_38" + op: "Assign" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/value/bias/adam_v" + input: "add_47" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/self/value/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/attention_1/output/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/attention_1/output/dense/kernel/adam_m/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/attention_1/output/dense/kernel/adam_m/Initializer/zeros" + op: "Fill" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/output/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/output/dense/kernel/adam_m/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/attention_1/output/dense/kernel/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/attention_1/output/dense/kernel/adam_m/Assign" + op: "Assign" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/output/dense/kernel/adam_m" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/output/dense/kernel/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/attention_1/output/dense/kernel/adam_m/read" + op: "Identity" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/output/dense/kernel/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/attention_1/output/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/attention_1/output/dense/kernel/adam_v/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/attention_1/output/dense/kernel/adam_v/Initializer/zeros" + op: "Fill" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/output/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/output/dense/kernel/adam_v/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/attention_1/output/dense/kernel/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/attention_1/output/dense/kernel/adam_v/Assign" + op: "Assign" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/output/dense/kernel/adam_v" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/output/dense/kernel/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/attention_1/output/dense/kernel/adam_v/read" + op: "Identity" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/output/dense/kernel/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_89/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_89" + op: "Mul" + input: "Mul_89/x" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/output/dense/kernel/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_90/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_90" + op: "Mul" + input: "Mul_90/x" + input: "clip_by_global_norm/clip_by_global_norm/_13" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_49" + op: "AddV2" + input: "Mul_89" + input: "Mul_90" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_91/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_91" + op: "Mul" + input: "Mul_91/x" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/output/dense/kernel/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_13" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_13" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_92/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_92" + op: "Mul" + input: "Mul_92/x" + input: "Square_13" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_50" + op: "AddV2" + input: "Mul_91" + input: "Mul_92" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_13" + op: "Sqrt" + input: "add_50" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_51/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_51" + op: "AddV2" + input: "Sqrt_13" + input: "add_51/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_21" + op: "RealDiv" + input: "add_49" + input: "add_51" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_93/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.009999999776482582 + } + } + } +} +node { + name: "mul_93" + op: "Mul" + input: "mul_93/x" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/output/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_52" + op: "AddV2" + input: "truediv_21" + input: "mul_93" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "norm_14/mul" + op: "Mul" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/output/dense/kernel/read" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/output/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "norm_14/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "norm_14/Sum" + op: "Sum" + input: "norm_14/mul" + input: "norm_14/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: true + } + } +} +node { + name: "norm_14/Sqrt" + op: "Sqrt" + input: "norm_14/Sum" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "norm_14/Squeeze" + op: "Squeeze" + input: "norm_14/Sqrt" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "squeeze_dims" + value { + list { + } + } + } +} +node { + name: "norm_15/mul" + op: "Mul" + input: "add_52" + input: "add_52" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "norm_15/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "norm_15/Sum" + op: "Sum" + input: "norm_15/mul" + input: "norm_15/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: true + } + } +} +node { + name: "norm_15/Sqrt" + op: "Sqrt" + input: "norm_15/Sum" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "norm_15/Squeeze" + op: "Squeeze" + input: "norm_15/Sqrt" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "squeeze_dims" + value { + list { + } + } + } +} +node { + name: "Greater_14/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "Greater_14" + op: "Greater" + input: "norm_14/Squeeze" + input: "Greater_14/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "Greater_15/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "Greater_15" + op: "Greater" + input: "norm_15/Squeeze" + input: "Greater_15/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "truediv_22" + op: "RealDiv" + input: "norm_14/Squeeze" + input: "norm_15/Squeeze" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "Select_14/e" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "Select_14" + op: "Select" + input: "Greater_15" + input: "truediv_22" + input: "Select_14/e" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "Select_15/e" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "Select_15" + op: "Select" + input: "Greater_14" + input: "Select_14" + input: "Select_15/e" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "mul_94" + op: "Mul" + input: "Select_15" + input: "add_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "mul_95" + op: "Mul" + input: "mul_94" + input: "add_52" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_15" + op: "Sub" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/output/dense/kernel/read" + input: "mul_95" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_39" + op: "Assign" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/output/dense/kernel" + input: "sub_15" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_40" + op: "Assign" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/output/dense/kernel/adam_m" + input: "add_49" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_41" + op: "Assign" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/output/dense/kernel/adam_v" + input: "add_50" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/attention_1/output/dense/bias/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/attention_1/output/dense/bias/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/attention_1/output/dense/bias/adam_m/Assign" + op: "Assign" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/output/dense/bias/adam_m" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/output/dense/bias/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/attention_1/output/dense/bias/adam_m/read" + op: "Identity" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/output/dense/bias/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/attention_1/output/dense/bias/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/attention_1/output/dense/bias/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/attention_1/output/dense/bias/adam_v/Assign" + op: "Assign" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/output/dense/bias/adam_v" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/output/dense/bias/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/attention_1/output/dense/bias/adam_v/read" + op: "Identity" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/output/dense/bias/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_96/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_96" + op: "Mul" + input: "Mul_96/x" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/output/dense/bias/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_97/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_97" + op: "Mul" + input: "Mul_97/x" + input: "clip_by_global_norm/clip_by_global_norm/_14" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_53" + op: "AddV2" + input: "Mul_96" + input: "Mul_97" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_98/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_98" + op: "Mul" + input: "Mul_98/x" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/output/dense/bias/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_14" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_14" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_99/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_99" + op: "Mul" + input: "Mul_99/x" + input: "Square_14" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_54" + op: "AddV2" + input: "Mul_98" + input: "Mul_99" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_14" + op: "Sqrt" + input: "add_54" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_55/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_55" + op: "AddV2" + input: "Sqrt_14" + input: "add_55/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_23" + op: "RealDiv" + input: "add_53" + input: "add_55" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_100/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "mul_100" + op: "Mul" + input: "mul_100/x" + input: "add_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "mul_101" + op: "Mul" + input: "mul_100" + input: "truediv_23" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_16" + op: "Sub" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/output/dense/bias/read" + input: "mul_101" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_42" + op: "Assign" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/output/dense/bias" + input: "sub_16" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_43" + op: "Assign" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/output/dense/bias/adam_m" + input: "add_53" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_44" + op: "Assign" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/output/dense/bias/adam_v" + input: "add_54" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm/beta/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm/beta/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm/beta/adam_m/Assign" + op: "Assign" + input: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm/beta/adam_m" + input: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm/beta/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm/beta/adam_m/read" + op: "Identity" + input: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm/beta/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm/beta/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm/beta/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm/beta/adam_v/Assign" + op: "Assign" + input: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm/beta/adam_v" + input: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm/beta/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm/beta/adam_v/read" + op: "Identity" + input: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm/beta/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_102/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_102" + op: "Mul" + input: "Mul_102/x" + input: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm/beta/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_103/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_103" + op: "Mul" + input: "Mul_103/x" + input: "clip_by_global_norm/clip_by_global_norm/_15" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_56" + op: "AddV2" + input: "Mul_102" + input: "Mul_103" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_104/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_104" + op: "Mul" + input: "Mul_104/x" + input: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm/beta/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_15" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_15" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_105/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_105" + op: "Mul" + input: "Mul_105/x" + input: "Square_15" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_57" + op: "AddV2" + input: "Mul_104" + input: "Mul_105" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_15" + op: "Sqrt" + input: "add_57" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_58/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_58" + op: "AddV2" + input: "Sqrt_15" + input: "add_58/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_24" + op: "RealDiv" + input: "add_56" + input: "add_58" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_106/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "mul_106" + op: "Mul" + input: "mul_106/x" + input: "add_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "mul_107" + op: "Mul" + input: "mul_106" + input: "truediv_24" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_17" + op: "Sub" + input: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm/beta/read" + input: "mul_107" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_45" + op: "Assign" + input: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm/beta" + input: "sub_17" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_46" + op: "Assign" + input: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm/beta/adam_m" + input: "add_56" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_47" + op: "Assign" + input: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm/beta/adam_v" + input: "add_57" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm/gamma/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm/gamma/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm/gamma/adam_m/Assign" + op: "Assign" + input: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm/gamma/adam_m" + input: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm/gamma/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm/gamma/adam_m/read" + op: "Identity" + input: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm/gamma/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm/gamma/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm/gamma/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm/gamma/adam_v/Assign" + op: "Assign" + input: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm/gamma/adam_v" + input: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm/gamma/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm/gamma/adam_v/read" + op: "Identity" + input: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm/gamma/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_108/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_108" + op: "Mul" + input: "Mul_108/x" + input: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm/gamma/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_109/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_109" + op: "Mul" + input: "Mul_109/x" + input: "clip_by_global_norm/clip_by_global_norm/_16" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_59" + op: "AddV2" + input: "Mul_108" + input: "Mul_109" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_110/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_110" + op: "Mul" + input: "Mul_110/x" + input: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm/gamma/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_16" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_16" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_111/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_111" + op: "Mul" + input: "Mul_111/x" + input: "Square_16" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_60" + op: "AddV2" + input: "Mul_110" + input: "Mul_111" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_16" + op: "Sqrt" + input: "add_60" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_61/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_61" + op: "AddV2" + input: "Sqrt_16" + input: "add_61/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_25" + op: "RealDiv" + input: "add_59" + input: "add_61" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_112/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "mul_112" + op: "Mul" + input: "mul_112/x" + input: "add_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "mul_113" + op: "Mul" + input: "mul_112" + input: "truediv_25" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_18" + op: "Sub" + input: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm/gamma/read" + input: "mul_113" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_48" + op: "Assign" + input: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm/gamma" + input: "sub_18" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_49" + op: "Assign" + input: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm/gamma/adam_m" + input: "add_59" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_50" + op: "Assign" + input: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm/gamma/adam_v" + input: "add_60" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\014\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/kernel/adam_m/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/kernel/adam_m/Initializer/zeros" + op: "Fill" + input: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/kernel/adam_m/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/kernel/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/kernel/adam_m/Assign" + op: "Assign" + input: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/kernel/adam_m" + input: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/kernel/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/kernel/adam_m/read" + op: "Identity" + input: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/kernel/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\014\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/kernel/adam_v/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/kernel/adam_v/Initializer/zeros" + op: "Fill" + input: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/kernel/adam_v/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/kernel/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/kernel/adam_v/Assign" + op: "Assign" + input: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/kernel/adam_v" + input: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/kernel/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/kernel/adam_v/read" + op: "Identity" + input: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/kernel/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Mul_114/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_114" + op: "Mul" + input: "Mul_114/x" + input: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/kernel/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Mul_115/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_115" + op: "Mul" + input: "Mul_115/x" + input: "clip_by_global_norm/clip_by_global_norm/_17" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "add_62" + op: "AddV2" + input: "Mul_114" + input: "Mul_115" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Mul_116/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_116" + op: "Mul" + input: "Mul_116/x" + input: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/kernel/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Square_17" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_17" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Mul_117/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_117" + op: "Mul" + input: "Mul_117/x" + input: "Square_17" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "add_63" + op: "AddV2" + input: "Mul_116" + input: "Mul_117" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Sqrt_17" + op: "Sqrt" + input: "add_63" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "add_64/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_64" + op: "AddV2" + input: "Sqrt_17" + input: "add_64/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "truediv_26" + op: "RealDiv" + input: "add_62" + input: "add_64" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "mul_118/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.009999999776482582 + } + } + } +} +node { + name: "mul_118" + op: "Mul" + input: "mul_118/x" + input: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "add_65" + op: "AddV2" + input: "truediv_26" + input: "mul_118" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "norm_16/mul" + op: "Mul" + input: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/kernel/read" + input: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "norm_16/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "norm_16/Sum" + op: "Sum" + input: "norm_16/mul" + input: "norm_16/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: true + } + } +} +node { + name: "norm_16/Sqrt" + op: "Sqrt" + input: "norm_16/Sum" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "norm_16/Squeeze" + op: "Squeeze" + input: "norm_16/Sqrt" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "squeeze_dims" + value { + list { + } + } + } +} +node { + name: "norm_17/mul" + op: "Mul" + input: "add_65" + input: "add_65" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "norm_17/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "norm_17/Sum" + op: "Sum" + input: "norm_17/mul" + input: "norm_17/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: true + } + } +} +node { + name: "norm_17/Sqrt" + op: "Sqrt" + input: "norm_17/Sum" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "norm_17/Squeeze" + op: "Squeeze" + input: "norm_17/Sqrt" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "squeeze_dims" + value { + list { + } + } + } +} +node { + name: "Greater_16/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "Greater_16" + op: "Greater" + input: "norm_16/Squeeze" + input: "Greater_16/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "Greater_17/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "Greater_17" + op: "Greater" + input: "norm_17/Squeeze" + input: "Greater_17/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "truediv_27" + op: "RealDiv" + input: "norm_16/Squeeze" + input: "norm_17/Squeeze" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "Select_16/e" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "Select_16" + op: "Select" + input: "Greater_17" + input: "truediv_27" + input: "Select_16/e" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "Select_17/e" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "Select_17" + op: "Select" + input: "Greater_16" + input: "Select_16" + input: "Select_17/e" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "mul_119" + op: "Mul" + input: "Select_17" + input: "add_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "mul_120" + op: "Mul" + input: "mul_119" + input: "add_65" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "sub_19" + op: "Sub" + input: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/kernel/read" + input: "mul_120" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Assign_51" + op: "Assign" + input: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/kernel" + input: "sub_19" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_52" + op: "Assign" + input: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/kernel/adam_m" + input: "add_62" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_53" + op: "Assign" + input: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/kernel/adam_v" + input: "add_63" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/bias/adam_m/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 3072 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/bias/adam_m/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/bias/adam_m/Initializer/zeros" + op: "Fill" + input: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/bias/adam_m/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/bias/adam_m/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/bias/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 3072 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/bias/adam_m/Assign" + op: "Assign" + input: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/bias/adam_m" + input: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/bias/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/bias/adam_m/read" + op: "Identity" + input: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/bias/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/bias/adam_v/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 3072 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/bias/adam_v/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/bias/adam_v/Initializer/zeros" + op: "Fill" + input: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/bias/adam_v/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/bias/adam_v/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/bias/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 3072 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/bias/adam_v/Assign" + op: "Assign" + input: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/bias/adam_v" + input: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/bias/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/bias/adam_v/read" + op: "Identity" + input: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/bias/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Mul_121/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_121" + op: "Mul" + input: "Mul_121/x" + input: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/bias/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Mul_122/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_122" + op: "Mul" + input: "Mul_122/x" + input: "clip_by_global_norm/clip_by_global_norm/_18" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "add_66" + op: "AddV2" + input: "Mul_121" + input: "Mul_122" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Mul_123/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_123" + op: "Mul" + input: "Mul_123/x" + input: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/bias/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Square_18" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_18" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Mul_124/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_124" + op: "Mul" + input: "Mul_124/x" + input: "Square_18" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "add_67" + op: "AddV2" + input: "Mul_123" + input: "Mul_124" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Sqrt_18" + op: "Sqrt" + input: "add_67" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "add_68/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_68" + op: "AddV2" + input: "Sqrt_18" + input: "add_68/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "truediv_28" + op: "RealDiv" + input: "add_66" + input: "add_68" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "mul_125/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "mul_125" + op: "Mul" + input: "mul_125/x" + input: "add_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "mul_126" + op: "Mul" + input: "mul_125" + input: "truediv_28" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "sub_20" + op: "Sub" + input: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/bias/read" + input: "mul_126" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } +} +node { + name: "Assign_54" + op: "Assign" + input: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/bias" + input: "sub_20" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_55" + op: "Assign" + input: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/bias/adam_m" + input: "add_66" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_56" + op: "Assign" + input: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/bias/adam_v" + input: "add_67" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\014\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/kernel/adam_m/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/kernel/adam_m/Initializer/zeros" + op: "Fill" + input: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/kernel/adam_m/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/kernel/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/kernel/adam_m/Assign" + op: "Assign" + input: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/kernel/adam_m" + input: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/kernel/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/kernel/adam_m/read" + op: "Identity" + input: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/kernel/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\014\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/kernel/adam_v/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/kernel/adam_v/Initializer/zeros" + op: "Fill" + input: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor" + input: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/kernel/adam_v/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/kernel/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/kernel/adam_v/Assign" + op: "Assign" + input: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/kernel/adam_v" + input: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/kernel/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/kernel/adam_v/read" + op: "Identity" + input: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/kernel/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_127/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_127" + op: "Mul" + input: "Mul_127/x" + input: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/kernel/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_128/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_128" + op: "Mul" + input: "Mul_128/x" + input: "clip_by_global_norm/clip_by_global_norm/_19" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_69" + op: "AddV2" + input: "Mul_127" + input: "Mul_128" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_129/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_129" + op: "Mul" + input: "Mul_129/x" + input: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/kernel/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_19" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_19" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_130/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_130" + op: "Mul" + input: "Mul_130/x" + input: "Square_19" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_70" + op: "AddV2" + input: "Mul_129" + input: "Mul_130" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_19" + op: "Sqrt" + input: "add_70" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_71/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_71" + op: "AddV2" + input: "Sqrt_19" + input: "add_71/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_29" + op: "RealDiv" + input: "add_69" + input: "add_71" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_131/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.009999999776482582 + } + } + } +} +node { + name: "mul_131" + op: "Mul" + input: "mul_131/x" + input: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_72" + op: "AddV2" + input: "truediv_29" + input: "mul_131" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "norm_18/mul" + op: "Mul" + input: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/kernel/read" + input: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "norm_18/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "norm_18/Sum" + op: "Sum" + input: "norm_18/mul" + input: "norm_18/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: true + } + } +} +node { + name: "norm_18/Sqrt" + op: "Sqrt" + input: "norm_18/Sum" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "norm_18/Squeeze" + op: "Squeeze" + input: "norm_18/Sqrt" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "squeeze_dims" + value { + list { + } + } + } +} +node { + name: "norm_19/mul" + op: "Mul" + input: "add_72" + input: "add_72" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "norm_19/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "norm_19/Sum" + op: "Sum" + input: "norm_19/mul" + input: "norm_19/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: true + } + } +} +node { + name: "norm_19/Sqrt" + op: "Sqrt" + input: "norm_19/Sum" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "norm_19/Squeeze" + op: "Squeeze" + input: "norm_19/Sqrt" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "squeeze_dims" + value { + list { + } + } + } +} +node { + name: "Greater_18/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "Greater_18" + op: "Greater" + input: "norm_18/Squeeze" + input: "Greater_18/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "Greater_19/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "Greater_19" + op: "Greater" + input: "norm_19/Squeeze" + input: "Greater_19/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "truediv_30" + op: "RealDiv" + input: "norm_18/Squeeze" + input: "norm_19/Squeeze" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "Select_18/e" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "Select_18" + op: "Select" + input: "Greater_19" + input: "truediv_30" + input: "Select_18/e" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "Select_19/e" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "Select_19" + op: "Select" + input: "Greater_18" + input: "Select_18" + input: "Select_19/e" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "mul_132" + op: "Mul" + input: "Select_19" + input: "add_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "mul_133" + op: "Mul" + input: "mul_132" + input: "add_72" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_21" + op: "Sub" + input: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/kernel/read" + input: "mul_133" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_57" + op: "Assign" + input: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/kernel" + input: "sub_21" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_58" + op: "Assign" + input: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/kernel/adam_m" + input: "add_69" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_59" + op: "Assign" + input: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/kernel/adam_v" + input: "add_70" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/bias/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/bias/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/bias/adam_m/Assign" + op: "Assign" + input: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/bias/adam_m" + input: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/bias/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/bias/adam_m/read" + op: "Identity" + input: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/bias/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/bias/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/bias/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/bias/adam_v/Assign" + op: "Assign" + input: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/bias/adam_v" + input: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/bias/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/bias/adam_v/read" + op: "Identity" + input: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/bias/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_134/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_134" + op: "Mul" + input: "Mul_134/x" + input: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/bias/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_135/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_135" + op: "Mul" + input: "Mul_135/x" + input: "clip_by_global_norm/clip_by_global_norm/_20" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_73" + op: "AddV2" + input: "Mul_134" + input: "Mul_135" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_136/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_136" + op: "Mul" + input: "Mul_136/x" + input: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/bias/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_20" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_20" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_137/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_137" + op: "Mul" + input: "Mul_137/x" + input: "Square_20" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_74" + op: "AddV2" + input: "Mul_136" + input: "Mul_137" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_20" + op: "Sqrt" + input: "add_74" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_75/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_75" + op: "AddV2" + input: "Sqrt_20" + input: "add_75/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_31" + op: "RealDiv" + input: "add_73" + input: "add_75" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_138/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "mul_138" + op: "Mul" + input: "mul_138/x" + input: "add_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "mul_139" + op: "Mul" + input: "mul_138" + input: "truediv_31" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_22" + op: "Sub" + input: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/bias/read" + input: "mul_139" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_60" + op: "Assign" + input: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/bias" + input: "sub_22" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_61" + op: "Assign" + input: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/bias/adam_m" + input: "add_73" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_62" + op: "Assign" + input: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/bias/adam_v" + input: "add_74" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm_1/beta/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/LayerNorm_1/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm_1/beta/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/LayerNorm_1/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm_1/beta/adam_m/Assign" + op: "Assign" + input: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm_1/beta/adam_m" + input: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm_1/beta/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/LayerNorm_1/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm_1/beta/adam_m/read" + op: "Identity" + input: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm_1/beta/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/LayerNorm_1/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm_1/beta/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/LayerNorm_1/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm_1/beta/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/LayerNorm_1/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm_1/beta/adam_v/Assign" + op: "Assign" + input: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm_1/beta/adam_v" + input: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm_1/beta/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/LayerNorm_1/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm_1/beta/adam_v/read" + op: "Identity" + input: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm_1/beta/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/LayerNorm_1/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_140/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_140" + op: "Mul" + input: "Mul_140/x" + input: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm_1/beta/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_141/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_141" + op: "Mul" + input: "Mul_141/x" + input: "clip_by_global_norm/clip_by_global_norm/_21" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_76" + op: "AddV2" + input: "Mul_140" + input: "Mul_141" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_142/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_142" + op: "Mul" + input: "Mul_142/x" + input: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm_1/beta/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_21" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_21" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_143/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_143" + op: "Mul" + input: "Mul_143/x" + input: "Square_21" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_77" + op: "AddV2" + input: "Mul_142" + input: "Mul_143" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_21" + op: "Sqrt" + input: "add_77" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_78/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_78" + op: "AddV2" + input: "Sqrt_21" + input: "add_78/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_32" + op: "RealDiv" + input: "add_76" + input: "add_78" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_144/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "mul_144" + op: "Mul" + input: "mul_144/x" + input: "add_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "mul_145" + op: "Mul" + input: "mul_144" + input: "truediv_32" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_23" + op: "Sub" + input: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm_1/beta/read" + input: "mul_145" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_63" + op: "Assign" + input: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm_1/beta" + input: "sub_23" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/LayerNorm_1/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_64" + op: "Assign" + input: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm_1/beta/adam_m" + input: "add_76" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/LayerNorm_1/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_65" + op: "Assign" + input: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm_1/beta/adam_v" + input: "add_77" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/LayerNorm_1/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm_1/gamma/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/LayerNorm_1/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm_1/gamma/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/LayerNorm_1/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm_1/gamma/adam_m/Assign" + op: "Assign" + input: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm_1/gamma/adam_m" + input: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm_1/gamma/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/LayerNorm_1/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm_1/gamma/adam_m/read" + op: "Identity" + input: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm_1/gamma/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/LayerNorm_1/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm_1/gamma/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/LayerNorm_1/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm_1/gamma/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/LayerNorm_1/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm_1/gamma/adam_v/Assign" + op: "Assign" + input: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm_1/gamma/adam_v" + input: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm_1/gamma/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/LayerNorm_1/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm_1/gamma/adam_v/read" + op: "Identity" + input: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm_1/gamma/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/LayerNorm_1/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_146/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_146" + op: "Mul" + input: "Mul_146/x" + input: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm_1/gamma/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_147/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_147" + op: "Mul" + input: "Mul_147/x" + input: "clip_by_global_norm/clip_by_global_norm/_22" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_79" + op: "AddV2" + input: "Mul_146" + input: "Mul_147" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_148/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_148" + op: "Mul" + input: "Mul_148/x" + input: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm_1/gamma/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_22" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_22" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_149/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_149" + op: "Mul" + input: "Mul_149/x" + input: "Square_22" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_80" + op: "AddV2" + input: "Mul_148" + input: "Mul_149" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_22" + op: "Sqrt" + input: "add_80" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_81/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_81" + op: "AddV2" + input: "Sqrt_22" + input: "add_81/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_33" + op: "RealDiv" + input: "add_79" + input: "add_81" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_150/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "mul_150" + op: "Mul" + input: "mul_150/x" + input: "add_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "mul_151" + op: "Mul" + input: "mul_150" + input: "truediv_33" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_24" + op: "Sub" + input: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm_1/gamma/read" + input: "mul_151" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_66" + op: "Assign" + input: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm_1/gamma" + input: "sub_24" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/LayerNorm_1/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_67" + op: "Assign" + input: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm_1/gamma/adam_m" + input: "add_79" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/LayerNorm_1/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_68" + op: "Assign" + input: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm_1/gamma/adam_v" + input: "add_80" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/LayerNorm_1/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/pooler/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/pooler/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/pooler/dense/kernel/adam_m/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/pooler/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/pooler/dense/kernel/adam_m/Initializer/zeros" + op: "Fill" + input: "bert/pooler/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor" + input: "bert/pooler/dense/kernel/adam_m/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/pooler/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/pooler/dense/kernel/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/pooler/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/pooler/dense/kernel/adam_m/Assign" + op: "Assign" + input: "bert/pooler/dense/kernel/adam_m" + input: "bert/pooler/dense/kernel/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/pooler/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/pooler/dense/kernel/adam_m/read" + op: "Identity" + input: "bert/pooler/dense/kernel/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/pooler/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/pooler/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/pooler/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\000\003\000\000" + } + } + } +} +node { + name: "bert/pooler/dense/kernel/adam_v/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/pooler/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/pooler/dense/kernel/adam_v/Initializer/zeros" + op: "Fill" + input: "bert/pooler/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor" + input: "bert/pooler/dense/kernel/adam_v/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/pooler/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "bert/pooler/dense/kernel/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/pooler/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/pooler/dense/kernel/adam_v/Assign" + op: "Assign" + input: "bert/pooler/dense/kernel/adam_v" + input: "bert/pooler/dense/kernel/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/pooler/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/pooler/dense/kernel/adam_v/read" + op: "Identity" + input: "bert/pooler/dense/kernel/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/pooler/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_152/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_152" + op: "Mul" + input: "Mul_152/x" + input: "bert/pooler/dense/kernel/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_153/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_153" + op: "Mul" + input: "Mul_153/x" + input: "clip_by_global_norm/clip_by_global_norm/_23" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_82" + op: "AddV2" + input: "Mul_152" + input: "Mul_153" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_154/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_154" + op: "Mul" + input: "Mul_154/x" + input: "bert/pooler/dense/kernel/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_23" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_23" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_155/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_155" + op: "Mul" + input: "Mul_155/x" + input: "Square_23" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_83" + op: "AddV2" + input: "Mul_154" + input: "Mul_155" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_23" + op: "Sqrt" + input: "add_83" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_84/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_84" + op: "AddV2" + input: "Sqrt_23" + input: "add_84/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_34" + op: "RealDiv" + input: "add_82" + input: "add_84" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_156/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.009999999776482582 + } + } + } +} +node { + name: "mul_156" + op: "Mul" + input: "mul_156/x" + input: "bert/pooler/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_85" + op: "AddV2" + input: "truediv_34" + input: "mul_156" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "norm_20/mul" + op: "Mul" + input: "bert/pooler/dense/kernel/read" + input: "bert/pooler/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "norm_20/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "norm_20/Sum" + op: "Sum" + input: "norm_20/mul" + input: "norm_20/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: true + } + } +} +node { + name: "norm_20/Sqrt" + op: "Sqrt" + input: "norm_20/Sum" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "norm_20/Squeeze" + op: "Squeeze" + input: "norm_20/Sqrt" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "squeeze_dims" + value { + list { + } + } + } +} +node { + name: "norm_21/mul" + op: "Mul" + input: "add_85" + input: "add_85" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "norm_21/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "norm_21/Sum" + op: "Sum" + input: "norm_21/mul" + input: "norm_21/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: true + } + } +} +node { + name: "norm_21/Sqrt" + op: "Sqrt" + input: "norm_21/Sum" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "norm_21/Squeeze" + op: "Squeeze" + input: "norm_21/Sqrt" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "squeeze_dims" + value { + list { + } + } + } +} +node { + name: "Greater_20/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "Greater_20" + op: "Greater" + input: "norm_20/Squeeze" + input: "Greater_20/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "Greater_21/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "Greater_21" + op: "Greater" + input: "norm_21/Squeeze" + input: "Greater_21/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "truediv_35" + op: "RealDiv" + input: "norm_20/Squeeze" + input: "norm_21/Squeeze" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "Select_20/e" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "Select_20" + op: "Select" + input: "Greater_21" + input: "truediv_35" + input: "Select_20/e" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "Select_21/e" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "Select_21" + op: "Select" + input: "Greater_20" + input: "Select_20" + input: "Select_21/e" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "mul_157" + op: "Mul" + input: "Select_21" + input: "add_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "mul_158" + op: "Mul" + input: "mul_157" + input: "add_85" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_25" + op: "Sub" + input: "bert/pooler/dense/kernel/read" + input: "mul_158" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_69" + op: "Assign" + input: "bert/pooler/dense/kernel" + input: "sub_25" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/pooler/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_70" + op: "Assign" + input: "bert/pooler/dense/kernel/adam_m" + input: "add_82" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/pooler/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_71" + op: "Assign" + input: "bert/pooler/dense/kernel/adam_v" + input: "add_83" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/pooler/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/pooler/dense/bias/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/pooler/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/pooler/dense/bias/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/pooler/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/pooler/dense/bias/adam_m/Assign" + op: "Assign" + input: "bert/pooler/dense/bias/adam_m" + input: "bert/pooler/dense/bias/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/pooler/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/pooler/dense/bias/adam_m/read" + op: "Identity" + input: "bert/pooler/dense/bias/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/pooler/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "bert/pooler/dense/bias/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@bert/pooler/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 768 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "bert/pooler/dense/bias/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@bert/pooler/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "bert/pooler/dense/bias/adam_v/Assign" + op: "Assign" + input: "bert/pooler/dense/bias/adam_v" + input: "bert/pooler/dense/bias/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/pooler/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "bert/pooler/dense/bias/adam_v/read" + op: "Identity" + input: "bert/pooler/dense/bias/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/pooler/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_159/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_159" + op: "Mul" + input: "Mul_159/x" + input: "bert/pooler/dense/bias/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_160/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_160" + op: "Mul" + input: "Mul_160/x" + input: "clip_by_global_norm/clip_by_global_norm/_24" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_86" + op: "AddV2" + input: "Mul_159" + input: "Mul_160" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_161/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_161" + op: "Mul" + input: "Mul_161/x" + input: "bert/pooler/dense/bias/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_24" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_24" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_162/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_162" + op: "Mul" + input: "Mul_162/x" + input: "Square_24" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_87" + op: "AddV2" + input: "Mul_161" + input: "Mul_162" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_24" + op: "Sqrt" + input: "add_87" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_88/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_88" + op: "AddV2" + input: "Sqrt_24" + input: "add_88/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_36" + op: "RealDiv" + input: "add_86" + input: "add_88" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_163/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "mul_163" + op: "Mul" + input: "mul_163/x" + input: "add_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "mul_164" + op: "Mul" + input: "mul_163" + input: "truediv_36" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_26" + op: "Sub" + input: "bert/pooler/dense/bias/read" + input: "mul_164" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_72" + op: "Assign" + input: "bert/pooler/dense/bias" + input: "sub_26" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/pooler/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_73" + op: "Assign" + input: "bert/pooler/dense/bias/adam_m" + input: "add_86" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/pooler/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_74" + op: "Assign" + input: "bert/pooler/dense/bias/adam_v" + input: "add_87" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/pooler/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "cls/predictions/transform/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/transform/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\200\000\000\000" + } + } + } +} +node { + name: "cls/predictions/transform/dense/kernel/adam_m/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/transform/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "cls/predictions/transform/dense/kernel/adam_m/Initializer/zeros" + op: "Fill" + input: "cls/predictions/transform/dense/kernel/adam_m/Initializer/zeros/shape_as_tensor" + input: "cls/predictions/transform/dense/kernel/adam_m/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/transform/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "cls/predictions/transform/dense/kernel/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/transform/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 128 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "cls/predictions/transform/dense/kernel/adam_m/Assign" + op: "Assign" + input: "cls/predictions/transform/dense/kernel/adam_m" + input: "cls/predictions/transform/dense/kernel/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/transform/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "cls/predictions/transform/dense/kernel/adam_m/read" + op: "Identity" + input: "cls/predictions/transform/dense/kernel/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/transform/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "cls/predictions/transform/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/transform/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\003\000\000\200\000\000\000" + } + } + } +} +node { + name: "cls/predictions/transform/dense/kernel/adam_v/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/transform/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "cls/predictions/transform/dense/kernel/adam_v/Initializer/zeros" + op: "Fill" + input: "cls/predictions/transform/dense/kernel/adam_v/Initializer/zeros/shape_as_tensor" + input: "cls/predictions/transform/dense/kernel/adam_v/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/transform/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "cls/predictions/transform/dense/kernel/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/transform/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 768 + } + dim { + size: 128 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "cls/predictions/transform/dense/kernel/adam_v/Assign" + op: "Assign" + input: "cls/predictions/transform/dense/kernel/adam_v" + input: "cls/predictions/transform/dense/kernel/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/transform/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "cls/predictions/transform/dense/kernel/adam_v/read" + op: "Identity" + input: "cls/predictions/transform/dense/kernel/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/transform/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "Mul_165/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_165" + op: "Mul" + input: "Mul_165/x" + input: "cls/predictions/transform/dense/kernel/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "Mul_166/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_166" + op: "Mul" + input: "Mul_166/x" + input: "clip_by_global_norm/clip_by_global_norm/_25" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "add_89" + op: "AddV2" + input: "Mul_165" + input: "Mul_166" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "Mul_167/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_167" + op: "Mul" + input: "Mul_167/x" + input: "cls/predictions/transform/dense/kernel/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "Square_25" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_25" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "Mul_168/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_168" + op: "Mul" + input: "Mul_168/x" + input: "Square_25" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "add_90" + op: "AddV2" + input: "Mul_167" + input: "Mul_168" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "Sqrt_25" + op: "Sqrt" + input: "add_90" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "add_91/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_91" + op: "AddV2" + input: "Sqrt_25" + input: "add_91/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "truediv_37" + op: "RealDiv" + input: "add_89" + input: "add_91" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "mul_169/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.009999999776482582 + } + } + } +} +node { + name: "mul_169" + op: "Mul" + input: "mul_169/x" + input: "cls/predictions/transform/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "add_92" + op: "AddV2" + input: "truediv_37" + input: "mul_169" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "norm_22/mul" + op: "Mul" + input: "cls/predictions/transform/dense/kernel/read" + input: "cls/predictions/transform/dense/kernel/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "norm_22/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "norm_22/Sum" + op: "Sum" + input: "norm_22/mul" + input: "norm_22/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: true + } + } +} +node { + name: "norm_22/Sqrt" + op: "Sqrt" + input: "norm_22/Sum" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "norm_22/Squeeze" + op: "Squeeze" + input: "norm_22/Sqrt" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "squeeze_dims" + value { + list { + } + } + } +} +node { + name: "norm_23/mul" + op: "Mul" + input: "add_92" + input: "add_92" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "norm_23/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "norm_23/Sum" + op: "Sum" + input: "norm_23/mul" + input: "norm_23/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: true + } + } +} +node { + name: "norm_23/Sqrt" + op: "Sqrt" + input: "norm_23/Sum" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "norm_23/Squeeze" + op: "Squeeze" + input: "norm_23/Sqrt" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "squeeze_dims" + value { + list { + } + } + } +} +node { + name: "Greater_22/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "Greater_22" + op: "Greater" + input: "norm_22/Squeeze" + input: "Greater_22/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "Greater_23/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "Greater_23" + op: "Greater" + input: "norm_23/Squeeze" + input: "Greater_23/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "truediv_38" + op: "RealDiv" + input: "norm_22/Squeeze" + input: "norm_23/Squeeze" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "Select_22/e" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "Select_22" + op: "Select" + input: "Greater_23" + input: "truediv_38" + input: "Select_22/e" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "Select_23/e" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "Select_23" + op: "Select" + input: "Greater_22" + input: "Select_22" + input: "Select_23/e" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "mul_170" + op: "Mul" + input: "Select_23" + input: "add_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "mul_171" + op: "Mul" + input: "mul_170" + input: "add_92" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "sub_27" + op: "Sub" + input: "cls/predictions/transform/dense/kernel/read" + input: "mul_171" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 128 + } + } + } + } + } +} +node { + name: "Assign_75" + op: "Assign" + input: "cls/predictions/transform/dense/kernel" + input: "sub_27" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/transform/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_76" + op: "Assign" + input: "cls/predictions/transform/dense/kernel/adam_m" + input: "add_89" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/transform/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_77" + op: "Assign" + input: "cls/predictions/transform/dense/kernel/adam_v" + input: "add_90" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/transform/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "cls/predictions/transform/dense/bias/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/transform/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 128 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 128 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "cls/predictions/transform/dense/bias/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/transform/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 128 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 128 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "cls/predictions/transform/dense/bias/adam_m/Assign" + op: "Assign" + input: "cls/predictions/transform/dense/bias/adam_m" + input: "cls/predictions/transform/dense/bias/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/transform/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 128 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "cls/predictions/transform/dense/bias/adam_m/read" + op: "Identity" + input: "cls/predictions/transform/dense/bias/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/transform/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 128 + } + } + } + } + } +} +node { + name: "cls/predictions/transform/dense/bias/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/transform/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 128 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 128 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "cls/predictions/transform/dense/bias/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/transform/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 128 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 128 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "cls/predictions/transform/dense/bias/adam_v/Assign" + op: "Assign" + input: "cls/predictions/transform/dense/bias/adam_v" + input: "cls/predictions/transform/dense/bias/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/transform/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 128 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "cls/predictions/transform/dense/bias/adam_v/read" + op: "Identity" + input: "cls/predictions/transform/dense/bias/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/transform/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 128 + } + } + } + } + } +} +node { + name: "Mul_172/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_172" + op: "Mul" + input: "Mul_172/x" + input: "cls/predictions/transform/dense/bias/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 128 + } + } + } + } + } +} +node { + name: "Mul_173/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_173" + op: "Mul" + input: "Mul_173/x" + input: "clip_by_global_norm/clip_by_global_norm/_26" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 128 + } + } + } + } + } +} +node { + name: "add_93" + op: "AddV2" + input: "Mul_172" + input: "Mul_173" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 128 + } + } + } + } + } +} +node { + name: "Mul_174/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_174" + op: "Mul" + input: "Mul_174/x" + input: "cls/predictions/transform/dense/bias/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 128 + } + } + } + } + } +} +node { + name: "Square_26" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_26" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 128 + } + } + } + } + } +} +node { + name: "Mul_175/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_175" + op: "Mul" + input: "Mul_175/x" + input: "Square_26" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 128 + } + } + } + } + } +} +node { + name: "add_94" + op: "AddV2" + input: "Mul_174" + input: "Mul_175" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 128 + } + } + } + } + } +} +node { + name: "Sqrt_26" + op: "Sqrt" + input: "add_94" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 128 + } + } + } + } + } +} +node { + name: "add_95/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_95" + op: "AddV2" + input: "Sqrt_26" + input: "add_95/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 128 + } + } + } + } + } +} +node { + name: "truediv_39" + op: "RealDiv" + input: "add_93" + input: "add_95" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 128 + } + } + } + } + } +} +node { + name: "mul_176/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "mul_176" + op: "Mul" + input: "mul_176/x" + input: "add_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "mul_177" + op: "Mul" + input: "mul_176" + input: "truediv_39" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 128 + } + } + } + } + } +} +node { + name: "sub_28" + op: "Sub" + input: "cls/predictions/transform/dense/bias/read" + input: "mul_177" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 128 + } + } + } + } + } +} +node { + name: "Assign_78" + op: "Assign" + input: "cls/predictions/transform/dense/bias" + input: "sub_28" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/transform/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 128 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_79" + op: "Assign" + input: "cls/predictions/transform/dense/bias/adam_m" + input: "add_93" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/transform/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 128 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_80" + op: "Assign" + input: "cls/predictions/transform/dense/bias/adam_v" + input: "add_94" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/transform/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 128 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "cls/predictions/transform/LayerNorm/beta/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/transform/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 128 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 128 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "cls/predictions/transform/LayerNorm/beta/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/transform/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 128 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 128 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "cls/predictions/transform/LayerNorm/beta/adam_m/Assign" + op: "Assign" + input: "cls/predictions/transform/LayerNorm/beta/adam_m" + input: "cls/predictions/transform/LayerNorm/beta/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/transform/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 128 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "cls/predictions/transform/LayerNorm/beta/adam_m/read" + op: "Identity" + input: "cls/predictions/transform/LayerNorm/beta/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/transform/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 128 + } + } + } + } + } +} +node { + name: "cls/predictions/transform/LayerNorm/beta/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/transform/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 128 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 128 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "cls/predictions/transform/LayerNorm/beta/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/transform/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 128 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 128 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "cls/predictions/transform/LayerNorm/beta/adam_v/Assign" + op: "Assign" + input: "cls/predictions/transform/LayerNorm/beta/adam_v" + input: "cls/predictions/transform/LayerNorm/beta/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/transform/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 128 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "cls/predictions/transform/LayerNorm/beta/adam_v/read" + op: "Identity" + input: "cls/predictions/transform/LayerNorm/beta/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/transform/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 128 + } + } + } + } + } +} +node { + name: "Mul_178/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_178" + op: "Mul" + input: "Mul_178/x" + input: "cls/predictions/transform/LayerNorm/beta/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 128 + } + } + } + } + } +} +node { + name: "Mul_179/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_179" + op: "Mul" + input: "Mul_179/x" + input: "clip_by_global_norm/clip_by_global_norm/_27" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 128 + } + } + } + } + } +} +node { + name: "add_96" + op: "AddV2" + input: "Mul_178" + input: "Mul_179" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 128 + } + } + } + } + } +} +node { + name: "Mul_180/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_180" + op: "Mul" + input: "Mul_180/x" + input: "cls/predictions/transform/LayerNorm/beta/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 128 + } + } + } + } + } +} +node { + name: "Square_27" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_27" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 128 + } + } + } + } + } +} +node { + name: "Mul_181/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_181" + op: "Mul" + input: "Mul_181/x" + input: "Square_27" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 128 + } + } + } + } + } +} +node { + name: "add_97" + op: "AddV2" + input: "Mul_180" + input: "Mul_181" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 128 + } + } + } + } + } +} +node { + name: "Sqrt_27" + op: "Sqrt" + input: "add_97" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 128 + } + } + } + } + } +} +node { + name: "add_98/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_98" + op: "AddV2" + input: "Sqrt_27" + input: "add_98/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 128 + } + } + } + } + } +} +node { + name: "truediv_40" + op: "RealDiv" + input: "add_96" + input: "add_98" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 128 + } + } + } + } + } +} +node { + name: "mul_182/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "mul_182" + op: "Mul" + input: "mul_182/x" + input: "add_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "mul_183" + op: "Mul" + input: "mul_182" + input: "truediv_40" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 128 + } + } + } + } + } +} +node { + name: "sub_29" + op: "Sub" + input: "cls/predictions/transform/LayerNorm/beta/read" + input: "mul_183" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 128 + } + } + } + } + } +} +node { + name: "Assign_81" + op: "Assign" + input: "cls/predictions/transform/LayerNorm/beta" + input: "sub_29" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/transform/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 128 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_82" + op: "Assign" + input: "cls/predictions/transform/LayerNorm/beta/adam_m" + input: "add_96" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/transform/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 128 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_83" + op: "Assign" + input: "cls/predictions/transform/LayerNorm/beta/adam_v" + input: "add_97" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/transform/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 128 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "cls/predictions/transform/LayerNorm/gamma/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/transform/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 128 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 128 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "cls/predictions/transform/LayerNorm/gamma/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/transform/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 128 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 128 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "cls/predictions/transform/LayerNorm/gamma/adam_m/Assign" + op: "Assign" + input: "cls/predictions/transform/LayerNorm/gamma/adam_m" + input: "cls/predictions/transform/LayerNorm/gamma/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/transform/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 128 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "cls/predictions/transform/LayerNorm/gamma/adam_m/read" + op: "Identity" + input: "cls/predictions/transform/LayerNorm/gamma/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/transform/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 128 + } + } + } + } + } +} +node { + name: "cls/predictions/transform/LayerNorm/gamma/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/transform/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 128 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 128 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "cls/predictions/transform/LayerNorm/gamma/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/transform/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 128 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 128 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "cls/predictions/transform/LayerNorm/gamma/adam_v/Assign" + op: "Assign" + input: "cls/predictions/transform/LayerNorm/gamma/adam_v" + input: "cls/predictions/transform/LayerNorm/gamma/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/transform/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 128 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "cls/predictions/transform/LayerNorm/gamma/adam_v/read" + op: "Identity" + input: "cls/predictions/transform/LayerNorm/gamma/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/transform/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 128 + } + } + } + } + } +} +node { + name: "Mul_184/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_184" + op: "Mul" + input: "Mul_184/x" + input: "cls/predictions/transform/LayerNorm/gamma/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 128 + } + } + } + } + } +} +node { + name: "Mul_185/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_185" + op: "Mul" + input: "Mul_185/x" + input: "clip_by_global_norm/clip_by_global_norm/_28" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 128 + } + } + } + } + } +} +node { + name: "add_99" + op: "AddV2" + input: "Mul_184" + input: "Mul_185" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 128 + } + } + } + } + } +} +node { + name: "Mul_186/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_186" + op: "Mul" + input: "Mul_186/x" + input: "cls/predictions/transform/LayerNorm/gamma/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 128 + } + } + } + } + } +} +node { + name: "Square_28" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_28" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 128 + } + } + } + } + } +} +node { + name: "Mul_187/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_187" + op: "Mul" + input: "Mul_187/x" + input: "Square_28" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 128 + } + } + } + } + } +} +node { + name: "add_100" + op: "AddV2" + input: "Mul_186" + input: "Mul_187" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 128 + } + } + } + } + } +} +node { + name: "Sqrt_28" + op: "Sqrt" + input: "add_100" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 128 + } + } + } + } + } +} +node { + name: "add_101/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_101" + op: "AddV2" + input: "Sqrt_28" + input: "add_101/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 128 + } + } + } + } + } +} +node { + name: "truediv_41" + op: "RealDiv" + input: "add_99" + input: "add_101" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 128 + } + } + } + } + } +} +node { + name: "mul_188/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "mul_188" + op: "Mul" + input: "mul_188/x" + input: "add_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "mul_189" + op: "Mul" + input: "mul_188" + input: "truediv_41" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 128 + } + } + } + } + } +} +node { + name: "sub_30" + op: "Sub" + input: "cls/predictions/transform/LayerNorm/gamma/read" + input: "mul_189" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 128 + } + } + } + } + } +} +node { + name: "Assign_84" + op: "Assign" + input: "cls/predictions/transform/LayerNorm/gamma" + input: "sub_30" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/transform/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 128 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_85" + op: "Assign" + input: "cls/predictions/transform/LayerNorm/gamma/adam_m" + input: "add_99" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/transform/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 128 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_86" + op: "Assign" + input: "cls/predictions/transform/LayerNorm/gamma/adam_v" + input: "add_100" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/transform/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 128 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "cls/predictions/output_bias/adam_m/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/output_bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 105686 + } + } + } +} +node { + name: "cls/predictions/output_bias/adam_m/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/output_bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "cls/predictions/output_bias/adam_m/Initializer/zeros" + op: "Fill" + input: "cls/predictions/output_bias/adam_m/Initializer/zeros/shape_as_tensor" + input: "cls/predictions/output_bias/adam_m/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/output_bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 105686 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "cls/predictions/output_bias/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/output_bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 105686 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 105686 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "cls/predictions/output_bias/adam_m/Assign" + op: "Assign" + input: "cls/predictions/output_bias/adam_m" + input: "cls/predictions/output_bias/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/output_bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 105686 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "cls/predictions/output_bias/adam_m/read" + op: "Identity" + input: "cls/predictions/output_bias/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/output_bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 105686 + } + } + } + } + } +} +node { + name: "cls/predictions/output_bias/adam_v/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/output_bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 105686 + } + } + } +} +node { + name: "cls/predictions/output_bias/adam_v/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/output_bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "cls/predictions/output_bias/adam_v/Initializer/zeros" + op: "Fill" + input: "cls/predictions/output_bias/adam_v/Initializer/zeros/shape_as_tensor" + input: "cls/predictions/output_bias/adam_v/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/output_bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 105686 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "cls/predictions/output_bias/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/output_bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 105686 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 105686 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "cls/predictions/output_bias/adam_v/Assign" + op: "Assign" + input: "cls/predictions/output_bias/adam_v" + input: "cls/predictions/output_bias/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/output_bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 105686 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "cls/predictions/output_bias/adam_v/read" + op: "Identity" + input: "cls/predictions/output_bias/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/output_bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 105686 + } + } + } + } + } +} +node { + name: "Mul_190/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_190" + op: "Mul" + input: "Mul_190/x" + input: "cls/predictions/output_bias/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 105686 + } + } + } + } + } +} +node { + name: "Mul_191/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_191" + op: "Mul" + input: "Mul_191/x" + input: "clip_by_global_norm/clip_by_global_norm/_29" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 105686 + } + } + } + } + } +} +node { + name: "add_102" + op: "AddV2" + input: "Mul_190" + input: "Mul_191" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 105686 + } + } + } + } + } +} +node { + name: "Mul_192/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_192" + op: "Mul" + input: "Mul_192/x" + input: "cls/predictions/output_bias/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 105686 + } + } + } + } + } +} +node { + name: "Square_29" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_29" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 105686 + } + } + } + } + } +} +node { + name: "Mul_193/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_193" + op: "Mul" + input: "Mul_193/x" + input: "Square_29" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 105686 + } + } + } + } + } +} +node { + name: "add_103" + op: "AddV2" + input: "Mul_192" + input: "Mul_193" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 105686 + } + } + } + } + } +} +node { + name: "Sqrt_29" + op: "Sqrt" + input: "add_103" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 105686 + } + } + } + } + } +} +node { + name: "add_104/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_104" + op: "AddV2" + input: "Sqrt_29" + input: "add_104/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 105686 + } + } + } + } + } +} +node { + name: "truediv_42" + op: "RealDiv" + input: "add_102" + input: "add_104" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 105686 + } + } + } + } + } +} +node { + name: "mul_194/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "mul_194" + op: "Mul" + input: "mul_194/x" + input: "add_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "mul_195" + op: "Mul" + input: "mul_194" + input: "truediv_42" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 105686 + } + } + } + } + } +} +node { + name: "sub_31" + op: "Sub" + input: "cls/predictions/output_bias/read" + input: "mul_195" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 105686 + } + } + } + } + } +} +node { + name: "Assign_87" + op: "Assign" + input: "cls/predictions/output_bias" + input: "sub_31" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/output_bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 105686 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_88" + op: "Assign" + input: "cls/predictions/output_bias/adam_m" + input: "add_102" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/output_bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 105686 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_89" + op: "Assign" + input: "cls/predictions/output_bias/adam_v" + input: "add_103" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/output_bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 105686 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "cls/seq_relationship/output_weights/adam_m/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@cls/seq_relationship/output_weights/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\002\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "cls/seq_relationship/output_weights/adam_m/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@cls/seq_relationship/output_weights/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "cls/seq_relationship/output_weights/adam_m/Initializer/zeros" + op: "Fill" + input: "cls/seq_relationship/output_weights/adam_m/Initializer/zeros/shape_as_tensor" + input: "cls/seq_relationship/output_weights/adam_m/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@cls/seq_relationship/output_weights/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "cls/seq_relationship/output_weights/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@cls/seq_relationship/output_weights/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 2 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "cls/seq_relationship/output_weights/adam_m/Assign" + op: "Assign" + input: "cls/seq_relationship/output_weights/adam_m" + input: "cls/seq_relationship/output_weights/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@cls/seq_relationship/output_weights/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "cls/seq_relationship/output_weights/adam_m/read" + op: "Identity" + input: "cls/seq_relationship/output_weights/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@cls/seq_relationship/output_weights/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "cls/seq_relationship/output_weights/adam_v/Initializer/zeros/shape_as_tensor" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@cls/seq_relationship/output_weights/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\002\000\000\000\000\003\000\000" + } + } + } +} +node { + name: "cls/seq_relationship/output_weights/adam_v/Initializer/zeros/Const" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@cls/seq_relationship/output_weights/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "cls/seq_relationship/output_weights/adam_v/Initializer/zeros" + op: "Fill" + input: "cls/seq_relationship/output_weights/adam_v/Initializer/zeros/shape_as_tensor" + input: "cls/seq_relationship/output_weights/adam_v/Initializer/zeros/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@cls/seq_relationship/output_weights/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "index_type" + value { + type: DT_INT32 + } + } +} +node { + name: "cls/seq_relationship/output_weights/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@cls/seq_relationship/output_weights/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 2 + } + dim { + size: 768 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "cls/seq_relationship/output_weights/adam_v/Assign" + op: "Assign" + input: "cls/seq_relationship/output_weights/adam_v" + input: "cls/seq_relationship/output_weights/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@cls/seq_relationship/output_weights/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "cls/seq_relationship/output_weights/adam_v/read" + op: "Identity" + input: "cls/seq_relationship/output_weights/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@cls/seq_relationship/output_weights/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_196/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_196" + op: "Mul" + input: "Mul_196/x" + input: "cls/seq_relationship/output_weights/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_197/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_197" + op: "Mul" + input: "Mul_197/x" + input: "clip_by_global_norm/clip_by_global_norm/_30" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_105" + op: "AddV2" + input: "Mul_196" + input: "Mul_197" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_198/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_198" + op: "Mul" + input: "Mul_198/x" + input: "cls/seq_relationship/output_weights/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Square_30" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_30" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Mul_199/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_199" + op: "Mul" + input: "Mul_199/x" + input: "Square_30" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_106" + op: "AddV2" + input: "Mul_198" + input: "Mul_199" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Sqrt_30" + op: "Sqrt" + input: "add_106" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_107/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_107" + op: "AddV2" + input: "Sqrt_30" + input: "add_107/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "truediv_43" + op: "RealDiv" + input: "add_105" + input: "add_107" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "mul_200/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.009999999776482582 + } + } + } +} +node { + name: "mul_200" + op: "Mul" + input: "mul_200/x" + input: "cls/seq_relationship/output_weights/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "add_108" + op: "AddV2" + input: "truediv_43" + input: "mul_200" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "norm_24/mul" + op: "Mul" + input: "cls/seq_relationship/output_weights/read" + input: "cls/seq_relationship/output_weights/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "norm_24/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "norm_24/Sum" + op: "Sum" + input: "norm_24/mul" + input: "norm_24/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: true + } + } +} +node { + name: "norm_24/Sqrt" + op: "Sqrt" + input: "norm_24/Sum" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "norm_24/Squeeze" + op: "Squeeze" + input: "norm_24/Sqrt" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "squeeze_dims" + value { + list { + } + } + } +} +node { + name: "norm_25/mul" + op: "Mul" + input: "add_108" + input: "add_108" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "norm_25/Const" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 2 + } + } + tensor_content: "\000\000\000\000\001\000\000\000" + } + } + } +} +node { + name: "norm_25/Sum" + op: "Sum" + input: "norm_25/mul" + input: "norm_25/Const" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 1 + } + } + } + } + } + attr { + key: "keep_dims" + value { + b: true + } + } +} +node { + name: "norm_25/Sqrt" + op: "Sqrt" + input: "norm_25/Sum" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "norm_25/Squeeze" + op: "Squeeze" + input: "norm_25/Sqrt" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "squeeze_dims" + value { + list { + } + } + } +} +node { + name: "Greater_24/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "Greater_24" + op: "Greater" + input: "norm_24/Squeeze" + input: "Greater_24/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "Greater_25/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0 + } + } + } +} +node { + name: "Greater_25" + op: "Greater" + input: "norm_25/Squeeze" + input: "Greater_25/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "truediv_44" + op: "RealDiv" + input: "norm_24/Squeeze" + input: "norm_25/Squeeze" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "Select_24/e" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "Select_24" + op: "Select" + input: "Greater_25" + input: "truediv_44" + input: "Select_24/e" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "Select_25/e" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "Select_25" + op: "Select" + input: "Greater_24" + input: "Select_24" + input: "Select_25/e" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "mul_201" + op: "Mul" + input: "Select_25" + input: "add_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "mul_202" + op: "Mul" + input: "mul_201" + input: "add_108" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "sub_32" + op: "Sub" + input: "cls/seq_relationship/output_weights/read" + input: "mul_202" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + dim { + size: 768 + } + } + } + } + } +} +node { + name: "Assign_90" + op: "Assign" + input: "cls/seq_relationship/output_weights" + input: "sub_32" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@cls/seq_relationship/output_weights" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_91" + op: "Assign" + input: "cls/seq_relationship/output_weights/adam_m" + input: "add_105" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@cls/seq_relationship/output_weights/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_92" + op: "Assign" + input: "cls/seq_relationship/output_weights/adam_v" + input: "add_106" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@cls/seq_relationship/output_weights/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "cls/seq_relationship/output_bias/adam_m/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@cls/seq_relationship/output_bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 2 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "cls/seq_relationship/output_bias/adam_m" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@cls/seq_relationship/output_bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 2 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "cls/seq_relationship/output_bias/adam_m/Assign" + op: "Assign" + input: "cls/seq_relationship/output_bias/adam_m" + input: "cls/seq_relationship/output_bias/adam_m/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@cls/seq_relationship/output_bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "cls/seq_relationship/output_bias/adam_m/read" + op: "Identity" + input: "cls/seq_relationship/output_bias/adam_m" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@cls/seq_relationship/output_bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "cls/seq_relationship/output_bias/adam_v/Initializer/zeros" + op: "Const" + attr { + key: "_class" + value { + list { + s: "loc:@cls/seq_relationship/output_bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + size: 2 + } + } + float_val: 0.0 + } + } + } +} +node { + name: "cls/seq_relationship/output_bias/adam_v" + op: "VariableV2" + attr { + key: "_class" + value { + list { + s: "loc:@cls/seq_relationship/output_bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "container" + value { + s: "" + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "shape" + value { + shape { + dim { + size: 2 + } + } + } + } + attr { + key: "shared_name" + value { + s: "" + } + } +} +node { + name: "cls/seq_relationship/output_bias/adam_v/Assign" + op: "Assign" + input: "cls/seq_relationship/output_bias/adam_v" + input: "cls/seq_relationship/output_bias/adam_v/Initializer/zeros" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@cls/seq_relationship/output_bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "cls/seq_relationship/output_bias/adam_v/read" + op: "Identity" + input: "cls/seq_relationship/output_bias/adam_v" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@cls/seq_relationship/output_bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "Mul_203/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.8999999761581421 + } + } + } +} +node { + name: "Mul_203" + op: "Mul" + input: "Mul_203/x" + input: "cls/seq_relationship/output_bias/adam_m/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "Mul_204/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.10000000149011612 + } + } + } +} +node { + name: "Mul_204" + op: "Mul" + input: "Mul_204/x" + input: "clip_by_global_norm/clip_by_global_norm/_31" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "add_109" + op: "AddV2" + input: "Mul_203" + input: "Mul_204" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "Mul_205/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.9990000128746033 + } + } + } +} +node { + name: "Mul_205" + op: "Mul" + input: "Mul_205/x" + input: "cls/seq_relationship/output_bias/adam_v/read" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "Square_31" + op: "Square" + input: "clip_by_global_norm/clip_by_global_norm/_31" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "Mul_206/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 0.0010000000474974513 + } + } + } +} +node { + name: "Mul_206" + op: "Mul" + input: "Mul_206/x" + input: "Square_31" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "add_110" + op: "AddV2" + input: "Mul_205" + input: "Mul_206" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "Sqrt_31" + op: "Sqrt" + input: "add_110" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "add_111/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 9.999999974752427e-07 + } + } + } +} +node { + name: "add_111" + op: "AddV2" + input: "Sqrt_31" + input: "add_111/y" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "truediv_45" + op: "RealDiv" + input: "add_109" + input: "add_111" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "mul_207/x" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + } + float_val: 1.0 + } + } + } +} +node { + name: "mul_207" + op: "Mul" + input: "mul_207/x" + input: "add_2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "mul_208" + op: "Mul" + input: "mul_207" + input: "truediv_45" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "sub_33" + op: "Sub" + input: "cls/seq_relationship/output_bias/read" + input: "mul_208" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } +} +node { + name: "Assign_93" + op: "Assign" + input: "cls/seq_relationship/output_bias" + input: "sub_33" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@cls/seq_relationship/output_bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_94" + op: "Assign" + input: "cls/seq_relationship/output_bias/adam_m" + input: "add_109" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@cls/seq_relationship/output_bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "Assign_95" + op: "Assign" + input: "cls/seq_relationship/output_bias/adam_v" + input: "add_110" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@cls/seq_relationship/output_bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: false + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "group_deps" + op: "NoOp" + input: "^Assign" + input: "^Assign_1" + input: "^Assign_10" + input: "^Assign_11" + input: "^Assign_12" + input: "^Assign_13" + input: "^Assign_14" + input: "^Assign_15" + input: "^Assign_16" + input: "^Assign_17" + input: "^Assign_18" + input: "^Assign_19" + input: "^Assign_2" + input: "^Assign_20" + input: "^Assign_21" + input: "^Assign_22" + input: "^Assign_23" + input: "^Assign_24" + input: "^Assign_25" + input: "^Assign_26" + input: "^Assign_27" + input: "^Assign_28" + input: "^Assign_29" + input: "^Assign_3" + input: "^Assign_30" + input: "^Assign_31" + input: "^Assign_32" + input: "^Assign_33" + input: "^Assign_34" + input: "^Assign_35" + input: "^Assign_36" + input: "^Assign_37" + input: "^Assign_38" + input: "^Assign_39" + input: "^Assign_4" + input: "^Assign_40" + input: "^Assign_41" + input: "^Assign_42" + input: "^Assign_43" + input: "^Assign_44" + input: "^Assign_45" + input: "^Assign_46" + input: "^Assign_47" + input: "^Assign_48" + input: "^Assign_49" + input: "^Assign_5" + input: "^Assign_50" + input: "^Assign_51" + input: "^Assign_52" + input: "^Assign_53" + input: "^Assign_54" + input: "^Assign_55" + input: "^Assign_56" + input: "^Assign_57" + input: "^Assign_58" + input: "^Assign_59" + input: "^Assign_6" + input: "^Assign_60" + input: "^Assign_61" + input: "^Assign_62" + input: "^Assign_63" + input: "^Assign_64" + input: "^Assign_65" + input: "^Assign_66" + input: "^Assign_67" + input: "^Assign_68" + input: "^Assign_69" + input: "^Assign_7" + input: "^Assign_70" + input: "^Assign_71" + input: "^Assign_72" + input: "^Assign_73" + input: "^Assign_74" + input: "^Assign_75" + input: "^Assign_76" + input: "^Assign_77" + input: "^Assign_78" + input: "^Assign_79" + input: "^Assign_8" + input: "^Assign_80" + input: "^Assign_81" + input: "^Assign_82" + input: "^Assign_83" + input: "^Assign_84" + input: "^Assign_85" + input: "^Assign_86" + input: "^Assign_87" + input: "^Assign_88" + input: "^Assign_89" + input: "^Assign_9" + input: "^Assign_90" + input: "^Assign_91" + input: "^Assign_92" + input: "^Assign_93" + input: "^Assign_94" + input: "^Assign_95" +} +node { + name: "ReadVariableOp" + op: "ReadVariableOp" + input: "global_step" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT64 + } + } +} +node { + name: "add_112/y" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT64 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT64 + tensor_shape { + } + int64_val: 1 + } + } + } +} +node { + name: "add_112" + op: "AddV2" + input: "ReadVariableOp" + input: "add_112/y" + attr { + key: "T" + value { + type: DT_INT64 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "AssignVariableOp" + op: "AssignVariableOp" + input: "global_step" + input: "add_112" + attr { + key: "dtype" + value { + type: DT_INT64 + } + } +} +node { + name: "ReadVariableOp_1" + op: "ReadVariableOp" + input: "global_step" + input: "^AssignVariableOp" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT64 + } + } +} +node { + name: "group_deps_1" + op: "NoOp" + input: "^AssignVariableOp" + input: "^group_deps" +} +node { + name: "loss/tags" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "loss" + } + } + } +} +node { + name: "loss" + op: "ScalarSummary" + input: "loss/tags" + input: "add_1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "init" + op: "NoOp" + input: "^bert/embeddings/LayerNorm/beta/Assign" + input: "^bert/embeddings/LayerNorm/beta/adam_m/Assign" + input: "^bert/embeddings/LayerNorm/beta/adam_v/Assign" + input: "^bert/embeddings/LayerNorm/gamma/Assign" + input: "^bert/embeddings/LayerNorm/gamma/adam_m/Assign" + input: "^bert/embeddings/LayerNorm/gamma/adam_v/Assign" + input: "^bert/embeddings/position_embeddings/Assign" + input: "^bert/embeddings/position_embeddings/adam_m/Assign" + input: "^bert/embeddings/position_embeddings/adam_v/Assign" + input: "^bert/embeddings/token_type_embeddings/Assign" + input: "^bert/embeddings/token_type_embeddings/adam_m/Assign" + input: "^bert/embeddings/token_type_embeddings/adam_v/Assign" + input: "^bert/embeddings/word_embeddings/Assign" + input: "^bert/embeddings/word_embeddings/adam_m/Assign" + input: "^bert/embeddings/word_embeddings/adam_v/Assign" + input: "^bert/encoder/embedding_hidden_mapping_in/bias/Assign" + input: "^bert/encoder/embedding_hidden_mapping_in/bias/adam_m/Assign" + input: "^bert/encoder/embedding_hidden_mapping_in/bias/adam_v/Assign" + input: "^bert/encoder/embedding_hidden_mapping_in/kernel/Assign" + input: "^bert/encoder/embedding_hidden_mapping_in/kernel/adam_m/Assign" + input: "^bert/encoder/embedding_hidden_mapping_in/kernel/adam_v/Assign" + input: "^bert/encoder/transformer/group_0/inner_group_0/LayerNorm/beta/Assign" + input: "^bert/encoder/transformer/group_0/inner_group_0/LayerNorm/beta/adam_m/Assign" + input: "^bert/encoder/transformer/group_0/inner_group_0/LayerNorm/beta/adam_v/Assign" + input: "^bert/encoder/transformer/group_0/inner_group_0/LayerNorm/gamma/Assign" + input: "^bert/encoder/transformer/group_0/inner_group_0/LayerNorm/gamma/adam_m/Assign" + input: "^bert/encoder/transformer/group_0/inner_group_0/LayerNorm/gamma/adam_v/Assign" + input: "^bert/encoder/transformer/group_0/inner_group_0/LayerNorm_1/beta/Assign" + input: "^bert/encoder/transformer/group_0/inner_group_0/LayerNorm_1/beta/adam_m/Assign" + input: "^bert/encoder/transformer/group_0/inner_group_0/LayerNorm_1/beta/adam_v/Assign" + input: "^bert/encoder/transformer/group_0/inner_group_0/LayerNorm_1/gamma/Assign" + input: "^bert/encoder/transformer/group_0/inner_group_0/LayerNorm_1/gamma/adam_m/Assign" + input: "^bert/encoder/transformer/group_0/inner_group_0/LayerNorm_1/gamma/adam_v/Assign" + input: "^bert/encoder/transformer/group_0/inner_group_0/attention_1/output/dense/bias/Assign" + input: "^bert/encoder/transformer/group_0/inner_group_0/attention_1/output/dense/bias/adam_m/Assign" + input: "^bert/encoder/transformer/group_0/inner_group_0/attention_1/output/dense/bias/adam_v/Assign" + input: "^bert/encoder/transformer/group_0/inner_group_0/attention_1/output/dense/kernel/Assign" + input: "^bert/encoder/transformer/group_0/inner_group_0/attention_1/output/dense/kernel/adam_m/Assign" + input: "^bert/encoder/transformer/group_0/inner_group_0/attention_1/output/dense/kernel/adam_v/Assign" + input: "^bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/bias/Assign" + input: "^bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/bias/adam_m/Assign" + input: "^bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/bias/adam_v/Assign" + input: "^bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/kernel/Assign" + input: "^bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/kernel/adam_m/Assign" + input: "^bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/kernel/adam_v/Assign" + input: "^bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/bias/Assign" + input: "^bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/bias/adam_m/Assign" + input: "^bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/bias/adam_v/Assign" + input: "^bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/kernel/Assign" + input: "^bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/kernel/adam_m/Assign" + input: "^bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/kernel/adam_v/Assign" + input: "^bert/encoder/transformer/group_0/inner_group_0/attention_1/self/value/bias/Assign" + input: "^bert/encoder/transformer/group_0/inner_group_0/attention_1/self/value/bias/adam_m/Assign" + input: "^bert/encoder/transformer/group_0/inner_group_0/attention_1/self/value/bias/adam_v/Assign" + input: "^bert/encoder/transformer/group_0/inner_group_0/attention_1/self/value/kernel/Assign" + input: "^bert/encoder/transformer/group_0/inner_group_0/attention_1/self/value/kernel/adam_m/Assign" + input: "^bert/encoder/transformer/group_0/inner_group_0/attention_1/self/value/kernel/adam_v/Assign" + input: "^bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/bias/Assign" + input: "^bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/bias/adam_m/Assign" + input: "^bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/bias/adam_v/Assign" + input: "^bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/kernel/Assign" + input: "^bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/kernel/adam_m/Assign" + input: "^bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/kernel/adam_v/Assign" + input: "^bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/bias/Assign" + input: "^bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/bias/adam_m/Assign" + input: "^bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/bias/adam_v/Assign" + input: "^bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/kernel/Assign" + input: "^bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/kernel/adam_m/Assign" + input: "^bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/kernel/adam_v/Assign" + input: "^bert/pooler/dense/bias/Assign" + input: "^bert/pooler/dense/bias/adam_m/Assign" + input: "^bert/pooler/dense/bias/adam_v/Assign" + input: "^bert/pooler/dense/kernel/Assign" + input: "^bert/pooler/dense/kernel/adam_m/Assign" + input: "^bert/pooler/dense/kernel/adam_v/Assign" + input: "^cls/predictions/output_bias/Assign" + input: "^cls/predictions/output_bias/adam_m/Assign" + input: "^cls/predictions/output_bias/adam_v/Assign" + input: "^cls/predictions/transform/LayerNorm/beta/Assign" + input: "^cls/predictions/transform/LayerNorm/beta/adam_m/Assign" + input: "^cls/predictions/transform/LayerNorm/beta/adam_v/Assign" + input: "^cls/predictions/transform/LayerNorm/gamma/Assign" + input: "^cls/predictions/transform/LayerNorm/gamma/adam_m/Assign" + input: "^cls/predictions/transform/LayerNorm/gamma/adam_v/Assign" + input: "^cls/predictions/transform/dense/bias/Assign" + input: "^cls/predictions/transform/dense/bias/adam_m/Assign" + input: "^cls/predictions/transform/dense/bias/adam_v/Assign" + input: "^cls/predictions/transform/dense/kernel/Assign" + input: "^cls/predictions/transform/dense/kernel/adam_m/Assign" + input: "^cls/predictions/transform/dense/kernel/adam_v/Assign" + input: "^cls/seq_relationship/output_bias/Assign" + input: "^cls/seq_relationship/output_bias/adam_m/Assign" + input: "^cls/seq_relationship/output_bias/adam_v/Assign" + input: "^cls/seq_relationship/output_weights/Assign" + input: "^cls/seq_relationship/output_weights/adam_m/Assign" + input: "^cls/seq_relationship/output_weights/adam_v/Assign" + input: "^global_step/Assign" +} +node { + name: "init_1" + op: "NoOp" +} +node { + name: "group_deps_2" + op: "NoOp" + input: "^init" + input: "^init_1" +} +node { + name: "report_uninitialized_variables/VarIsInitializedOp" + op: "VarIsInitializedOp" + input: "global_step" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized" + op: "IsVariableInitialized" + input: "bert/embeddings/word_embeddings" + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/word_embeddings" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_1" + op: "IsVariableInitialized" + input: "bert/embeddings/token_type_embeddings" + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/token_type_embeddings" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_2" + op: "IsVariableInitialized" + input: "bert/embeddings/position_embeddings" + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/position_embeddings" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_3" + op: "IsVariableInitialized" + input: "bert/embeddings/LayerNorm/beta" + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_4" + op: "IsVariableInitialized" + input: "bert/embeddings/LayerNorm/gamma" + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_5" + op: "IsVariableInitialized" + input: "bert/encoder/embedding_hidden_mapping_in/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/embedding_hidden_mapping_in/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_6" + op: "IsVariableInitialized" + input: "bert/encoder/embedding_hidden_mapping_in/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/embedding_hidden_mapping_in/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_7" + op: "IsVariableInitialized" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_8" + op: "IsVariableInitialized" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_9" + op: "IsVariableInitialized" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_10" + op: "IsVariableInitialized" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_11" + op: "IsVariableInitialized" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/value/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_12" + op: "IsVariableInitialized" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/value/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/self/value/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_13" + op: "IsVariableInitialized" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/output/dense/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_14" + op: "IsVariableInitialized" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/output/dense/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_15" + op: "IsVariableInitialized" + input: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm/beta" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_16" + op: "IsVariableInitialized" + input: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm/gamma" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_17" + op: "IsVariableInitialized" + input: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_18" + op: "IsVariableInitialized" + input: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_19" + op: "IsVariableInitialized" + input: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_20" + op: "IsVariableInitialized" + input: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_21" + op: "IsVariableInitialized" + input: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm_1/beta" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/LayerNorm_1/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_22" + op: "IsVariableInitialized" + input: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm_1/gamma" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/LayerNorm_1/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_23" + op: "IsVariableInitialized" + input: "bert/pooler/dense/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/pooler/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_24" + op: "IsVariableInitialized" + input: "bert/pooler/dense/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/pooler/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_25" + op: "IsVariableInitialized" + input: "cls/predictions/transform/dense/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/transform/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_26" + op: "IsVariableInitialized" + input: "cls/predictions/transform/dense/bias" + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/transform/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_27" + op: "IsVariableInitialized" + input: "cls/predictions/transform/LayerNorm/beta" + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/transform/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_28" + op: "IsVariableInitialized" + input: "cls/predictions/transform/LayerNorm/gamma" + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/transform/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_29" + op: "IsVariableInitialized" + input: "cls/predictions/output_bias" + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/output_bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_30" + op: "IsVariableInitialized" + input: "cls/seq_relationship/output_weights" + attr { + key: "_class" + value { + list { + s: "loc:@cls/seq_relationship/output_weights" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_31" + op: "IsVariableInitialized" + input: "cls/seq_relationship/output_bias" + attr { + key: "_class" + value { + list { + s: "loc:@cls/seq_relationship/output_bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_32" + op: "IsVariableInitialized" + input: "bert/embeddings/word_embeddings/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/word_embeddings/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_33" + op: "IsVariableInitialized" + input: "bert/embeddings/word_embeddings/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/word_embeddings/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_34" + op: "IsVariableInitialized" + input: "bert/embeddings/token_type_embeddings/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/token_type_embeddings/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_35" + op: "IsVariableInitialized" + input: "bert/embeddings/token_type_embeddings/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/token_type_embeddings/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_36" + op: "IsVariableInitialized" + input: "bert/embeddings/position_embeddings/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/position_embeddings/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_37" + op: "IsVariableInitialized" + input: "bert/embeddings/position_embeddings/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/position_embeddings/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_38" + op: "IsVariableInitialized" + input: "bert/embeddings/LayerNorm/beta/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_39" + op: "IsVariableInitialized" + input: "bert/embeddings/LayerNorm/beta/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_40" + op: "IsVariableInitialized" + input: "bert/embeddings/LayerNorm/gamma/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_41" + op: "IsVariableInitialized" + input: "bert/embeddings/LayerNorm/gamma/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_42" + op: "IsVariableInitialized" + input: "bert/encoder/embedding_hidden_mapping_in/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/embedding_hidden_mapping_in/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_43" + op: "IsVariableInitialized" + input: "bert/encoder/embedding_hidden_mapping_in/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/embedding_hidden_mapping_in/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_44" + op: "IsVariableInitialized" + input: "bert/encoder/embedding_hidden_mapping_in/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/embedding_hidden_mapping_in/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_45" + op: "IsVariableInitialized" + input: "bert/encoder/embedding_hidden_mapping_in/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/embedding_hidden_mapping_in/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_46" + op: "IsVariableInitialized" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_47" + op: "IsVariableInitialized" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_48" + op: "IsVariableInitialized" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_49" + op: "IsVariableInitialized" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_50" + op: "IsVariableInitialized" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_51" + op: "IsVariableInitialized" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_52" + op: "IsVariableInitialized" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_53" + op: "IsVariableInitialized" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_54" + op: "IsVariableInitialized" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/value/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_55" + op: "IsVariableInitialized" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/value/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_56" + op: "IsVariableInitialized" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/value/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/self/value/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_57" + op: "IsVariableInitialized" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/value/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/self/value/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_58" + op: "IsVariableInitialized" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/output/dense/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_59" + op: "IsVariableInitialized" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/output/dense/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_60" + op: "IsVariableInitialized" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/output/dense/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_61" + op: "IsVariableInitialized" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/output/dense/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_62" + op: "IsVariableInitialized" + input: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm/beta/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_63" + op: "IsVariableInitialized" + input: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm/beta/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_64" + op: "IsVariableInitialized" + input: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm/gamma/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_65" + op: "IsVariableInitialized" + input: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm/gamma/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_66" + op: "IsVariableInitialized" + input: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_67" + op: "IsVariableInitialized" + input: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_68" + op: "IsVariableInitialized" + input: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_69" + op: "IsVariableInitialized" + input: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_70" + op: "IsVariableInitialized" + input: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_71" + op: "IsVariableInitialized" + input: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_72" + op: "IsVariableInitialized" + input: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_73" + op: "IsVariableInitialized" + input: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_74" + op: "IsVariableInitialized" + input: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm_1/beta/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/LayerNorm_1/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_75" + op: "IsVariableInitialized" + input: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm_1/beta/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/LayerNorm_1/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_76" + op: "IsVariableInitialized" + input: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm_1/gamma/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/LayerNorm_1/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_77" + op: "IsVariableInitialized" + input: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm_1/gamma/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/LayerNorm_1/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_78" + op: "IsVariableInitialized" + input: "bert/pooler/dense/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/pooler/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_79" + op: "IsVariableInitialized" + input: "bert/pooler/dense/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/pooler/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_80" + op: "IsVariableInitialized" + input: "bert/pooler/dense/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/pooler/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_81" + op: "IsVariableInitialized" + input: "bert/pooler/dense/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/pooler/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_82" + op: "IsVariableInitialized" + input: "cls/predictions/transform/dense/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/transform/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_83" + op: "IsVariableInitialized" + input: "cls/predictions/transform/dense/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/transform/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_84" + op: "IsVariableInitialized" + input: "cls/predictions/transform/dense/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/transform/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_85" + op: "IsVariableInitialized" + input: "cls/predictions/transform/dense/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/transform/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_86" + op: "IsVariableInitialized" + input: "cls/predictions/transform/LayerNorm/beta/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/transform/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_87" + op: "IsVariableInitialized" + input: "cls/predictions/transform/LayerNorm/beta/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/transform/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_88" + op: "IsVariableInitialized" + input: "cls/predictions/transform/LayerNorm/gamma/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/transform/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_89" + op: "IsVariableInitialized" + input: "cls/predictions/transform/LayerNorm/gamma/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/transform/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_90" + op: "IsVariableInitialized" + input: "cls/predictions/output_bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/output_bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_91" + op: "IsVariableInitialized" + input: "cls/predictions/output_bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/output_bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_92" + op: "IsVariableInitialized" + input: "cls/seq_relationship/output_weights/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@cls/seq_relationship/output_weights/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_93" + op: "IsVariableInitialized" + input: "cls/seq_relationship/output_weights/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@cls/seq_relationship/output_weights/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_94" + op: "IsVariableInitialized" + input: "cls/seq_relationship/output_bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@cls/seq_relationship/output_bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/IsVariableInitialized_95" + op: "IsVariableInitialized" + input: "cls/seq_relationship/output_bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@cls/seq_relationship/output_bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables/stack" + op: "Pack" + input: "report_uninitialized_variables/VarIsInitializedOp" + input: "report_uninitialized_variables/IsVariableInitialized" + input: "report_uninitialized_variables/IsVariableInitialized_1" + input: "report_uninitialized_variables/IsVariableInitialized_2" + input: "report_uninitialized_variables/IsVariableInitialized_3" + input: "report_uninitialized_variables/IsVariableInitialized_4" + input: "report_uninitialized_variables/IsVariableInitialized_5" + input: "report_uninitialized_variables/IsVariableInitialized_6" + input: "report_uninitialized_variables/IsVariableInitialized_7" + input: "report_uninitialized_variables/IsVariableInitialized_8" + input: "report_uninitialized_variables/IsVariableInitialized_9" + input: "report_uninitialized_variables/IsVariableInitialized_10" + input: "report_uninitialized_variables/IsVariableInitialized_11" + input: "report_uninitialized_variables/IsVariableInitialized_12" + input: "report_uninitialized_variables/IsVariableInitialized_13" + input: "report_uninitialized_variables/IsVariableInitialized_14" + input: "report_uninitialized_variables/IsVariableInitialized_15" + input: "report_uninitialized_variables/IsVariableInitialized_16" + input: "report_uninitialized_variables/IsVariableInitialized_17" + input: "report_uninitialized_variables/IsVariableInitialized_18" + input: "report_uninitialized_variables/IsVariableInitialized_19" + input: "report_uninitialized_variables/IsVariableInitialized_20" + input: "report_uninitialized_variables/IsVariableInitialized_21" + input: "report_uninitialized_variables/IsVariableInitialized_22" + input: "report_uninitialized_variables/IsVariableInitialized_23" + input: "report_uninitialized_variables/IsVariableInitialized_24" + input: "report_uninitialized_variables/IsVariableInitialized_25" + input: "report_uninitialized_variables/IsVariableInitialized_26" + input: "report_uninitialized_variables/IsVariableInitialized_27" + input: "report_uninitialized_variables/IsVariableInitialized_28" + input: "report_uninitialized_variables/IsVariableInitialized_29" + input: "report_uninitialized_variables/IsVariableInitialized_30" + input: "report_uninitialized_variables/IsVariableInitialized_31" + input: "report_uninitialized_variables/IsVariableInitialized_32" + input: "report_uninitialized_variables/IsVariableInitialized_33" + input: "report_uninitialized_variables/IsVariableInitialized_34" + input: "report_uninitialized_variables/IsVariableInitialized_35" + input: "report_uninitialized_variables/IsVariableInitialized_36" + input: "report_uninitialized_variables/IsVariableInitialized_37" + input: "report_uninitialized_variables/IsVariableInitialized_38" + input: "report_uninitialized_variables/IsVariableInitialized_39" + input: "report_uninitialized_variables/IsVariableInitialized_40" + input: "report_uninitialized_variables/IsVariableInitialized_41" + input: "report_uninitialized_variables/IsVariableInitialized_42" + input: "report_uninitialized_variables/IsVariableInitialized_43" + input: "report_uninitialized_variables/IsVariableInitialized_44" + input: "report_uninitialized_variables/IsVariableInitialized_45" + input: "report_uninitialized_variables/IsVariableInitialized_46" + input: "report_uninitialized_variables/IsVariableInitialized_47" + input: "report_uninitialized_variables/IsVariableInitialized_48" + input: "report_uninitialized_variables/IsVariableInitialized_49" + input: "report_uninitialized_variables/IsVariableInitialized_50" + input: "report_uninitialized_variables/IsVariableInitialized_51" + input: "report_uninitialized_variables/IsVariableInitialized_52" + input: "report_uninitialized_variables/IsVariableInitialized_53" + input: "report_uninitialized_variables/IsVariableInitialized_54" + input: "report_uninitialized_variables/IsVariableInitialized_55" + input: "report_uninitialized_variables/IsVariableInitialized_56" + input: "report_uninitialized_variables/IsVariableInitialized_57" + input: "report_uninitialized_variables/IsVariableInitialized_58" + input: "report_uninitialized_variables/IsVariableInitialized_59" + input: "report_uninitialized_variables/IsVariableInitialized_60" + input: "report_uninitialized_variables/IsVariableInitialized_61" + input: "report_uninitialized_variables/IsVariableInitialized_62" + input: "report_uninitialized_variables/IsVariableInitialized_63" + input: "report_uninitialized_variables/IsVariableInitialized_64" + input: "report_uninitialized_variables/IsVariableInitialized_65" + input: "report_uninitialized_variables/IsVariableInitialized_66" + input: "report_uninitialized_variables/IsVariableInitialized_67" + input: "report_uninitialized_variables/IsVariableInitialized_68" + input: "report_uninitialized_variables/IsVariableInitialized_69" + input: "report_uninitialized_variables/IsVariableInitialized_70" + input: "report_uninitialized_variables/IsVariableInitialized_71" + input: "report_uninitialized_variables/IsVariableInitialized_72" + input: "report_uninitialized_variables/IsVariableInitialized_73" + input: "report_uninitialized_variables/IsVariableInitialized_74" + input: "report_uninitialized_variables/IsVariableInitialized_75" + input: "report_uninitialized_variables/IsVariableInitialized_76" + input: "report_uninitialized_variables/IsVariableInitialized_77" + input: "report_uninitialized_variables/IsVariableInitialized_78" + input: "report_uninitialized_variables/IsVariableInitialized_79" + input: "report_uninitialized_variables/IsVariableInitialized_80" + input: "report_uninitialized_variables/IsVariableInitialized_81" + input: "report_uninitialized_variables/IsVariableInitialized_82" + input: "report_uninitialized_variables/IsVariableInitialized_83" + input: "report_uninitialized_variables/IsVariableInitialized_84" + input: "report_uninitialized_variables/IsVariableInitialized_85" + input: "report_uninitialized_variables/IsVariableInitialized_86" + input: "report_uninitialized_variables/IsVariableInitialized_87" + input: "report_uninitialized_variables/IsVariableInitialized_88" + input: "report_uninitialized_variables/IsVariableInitialized_89" + input: "report_uninitialized_variables/IsVariableInitialized_90" + input: "report_uninitialized_variables/IsVariableInitialized_91" + input: "report_uninitialized_variables/IsVariableInitialized_92" + input: "report_uninitialized_variables/IsVariableInitialized_93" + input: "report_uninitialized_variables/IsVariableInitialized_94" + input: "report_uninitialized_variables/IsVariableInitialized_95" + device: "/device:CPU:0" + attr { + key: "N" + value { + i: 97 + } + } + attr { + key: "T" + value { + type: DT_BOOL + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 97 + } + } + } + } + } + attr { + key: "axis" + value { + i: 0 + } + } +} +node { + name: "report_uninitialized_variables/LogicalNot" + op: "LogicalNot" + input: "report_uninitialized_variables/stack" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 97 + } + } + } + } + } +} +node { + name: "report_uninitialized_variables/Const" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 97 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 97 + } + } + string_val: "global_step" + string_val: "bert/embeddings/word_embeddings" + string_val: "bert/embeddings/token_type_embeddings" + string_val: "bert/embeddings/position_embeddings" + string_val: "bert/embeddings/LayerNorm/beta" + string_val: "bert/embeddings/LayerNorm/gamma" + string_val: "bert/encoder/embedding_hidden_mapping_in/kernel" + string_val: "bert/encoder/embedding_hidden_mapping_in/bias" + string_val: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/kernel" + string_val: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/bias" + string_val: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/kernel" + string_val: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/bias" + string_val: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/value/kernel" + string_val: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/value/bias" + string_val: "bert/encoder/transformer/group_0/inner_group_0/attention_1/output/dense/kernel" + string_val: "bert/encoder/transformer/group_0/inner_group_0/attention_1/output/dense/bias" + string_val: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm/beta" + string_val: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm/gamma" + string_val: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/kernel" + string_val: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/bias" + string_val: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/kernel" + string_val: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/bias" + string_val: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm_1/beta" + string_val: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm_1/gamma" + string_val: "bert/pooler/dense/kernel" + string_val: "bert/pooler/dense/bias" + string_val: "cls/predictions/transform/dense/kernel" + string_val: "cls/predictions/transform/dense/bias" + string_val: "cls/predictions/transform/LayerNorm/beta" + string_val: "cls/predictions/transform/LayerNorm/gamma" + string_val: "cls/predictions/output_bias" + string_val: "cls/seq_relationship/output_weights" + string_val: "cls/seq_relationship/output_bias" + string_val: "bert/embeddings/word_embeddings/adam_m" + string_val: "bert/embeddings/word_embeddings/adam_v" + string_val: "bert/embeddings/token_type_embeddings/adam_m" + string_val: "bert/embeddings/token_type_embeddings/adam_v" + string_val: "bert/embeddings/position_embeddings/adam_m" + string_val: "bert/embeddings/position_embeddings/adam_v" + string_val: "bert/embeddings/LayerNorm/beta/adam_m" + string_val: "bert/embeddings/LayerNorm/beta/adam_v" + string_val: "bert/embeddings/LayerNorm/gamma/adam_m" + string_val: "bert/embeddings/LayerNorm/gamma/adam_v" + string_val: "bert/encoder/embedding_hidden_mapping_in/kernel/adam_m" + string_val: "bert/encoder/embedding_hidden_mapping_in/kernel/adam_v" + string_val: "bert/encoder/embedding_hidden_mapping_in/bias/adam_m" + string_val: "bert/encoder/embedding_hidden_mapping_in/bias/adam_v" + string_val: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/kernel/adam_m" + string_val: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/kernel/adam_v" + string_val: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/bias/adam_m" + string_val: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/bias/adam_v" + string_val: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/kernel/adam_m" + string_val: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/kernel/adam_v" + string_val: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/bias/adam_m" + string_val: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/bias/adam_v" + string_val: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/value/kernel/adam_m" + string_val: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/value/kernel/adam_v" + string_val: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/value/bias/adam_m" + string_val: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/value/bias/adam_v" + string_val: "bert/encoder/transformer/group_0/inner_group_0/attention_1/output/dense/kernel/adam_m" + string_val: "bert/encoder/transformer/group_0/inner_group_0/attention_1/output/dense/kernel/adam_v" + string_val: "bert/encoder/transformer/group_0/inner_group_0/attention_1/output/dense/bias/adam_m" + string_val: "bert/encoder/transformer/group_0/inner_group_0/attention_1/output/dense/bias/adam_v" + string_val: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm/beta/adam_m" + string_val: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm/beta/adam_v" + string_val: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm/gamma/adam_m" + string_val: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm/gamma/adam_v" + string_val: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/kernel/adam_m" + string_val: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/kernel/adam_v" + string_val: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/bias/adam_m" + string_val: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/bias/adam_v" + string_val: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/kernel/adam_m" + string_val: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/kernel/adam_v" + string_val: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/bias/adam_m" + string_val: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/bias/adam_v" + string_val: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm_1/beta/adam_m" + string_val: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm_1/beta/adam_v" + string_val: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm_1/gamma/adam_m" + string_val: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm_1/gamma/adam_v" + string_val: "bert/pooler/dense/kernel/adam_m" + string_val: "bert/pooler/dense/kernel/adam_v" + string_val: "bert/pooler/dense/bias/adam_m" + string_val: "bert/pooler/dense/bias/adam_v" + string_val: "cls/predictions/transform/dense/kernel/adam_m" + string_val: "cls/predictions/transform/dense/kernel/adam_v" + string_val: "cls/predictions/transform/dense/bias/adam_m" + string_val: "cls/predictions/transform/dense/bias/adam_v" + string_val: "cls/predictions/transform/LayerNorm/beta/adam_m" + string_val: "cls/predictions/transform/LayerNorm/beta/adam_v" + string_val: "cls/predictions/transform/LayerNorm/gamma/adam_m" + string_val: "cls/predictions/transform/LayerNorm/gamma/adam_v" + string_val: "cls/predictions/output_bias/adam_m" + string_val: "cls/predictions/output_bias/adam_v" + string_val: "cls/seq_relationship/output_weights/adam_m" + string_val: "cls/seq_relationship/output_weights/adam_v" + string_val: "cls/seq_relationship/output_bias/adam_m" + string_val: "cls/seq_relationship/output_bias/adam_v" + } + } + } +} +node { + name: "report_uninitialized_variables/boolean_mask/Shape" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 97 + } + } + } +} +node { + name: "report_uninitialized_variables/boolean_mask/strided_slice/stack" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 0 + } + } + } +} +node { + name: "report_uninitialized_variables/boolean_mask/strided_slice/stack_1" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "report_uninitialized_variables/boolean_mask/strided_slice/stack_2" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "report_uninitialized_variables/boolean_mask/strided_slice" + op: "StridedSlice" + input: "report_uninitialized_variables/boolean_mask/Shape" + input: "report_uninitialized_variables/boolean_mask/strided_slice/stack" + input: "report_uninitialized_variables/boolean_mask/strided_slice/stack_1" + input: "report_uninitialized_variables/boolean_mask/strided_slice/stack_2" + device: "/device:CPU:0" + attr { + key: "Index" + value { + type: DT_INT32 + } + } + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "begin_mask" + value { + i: 0 + } + } + attr { + key: "ellipsis_mask" + value { + i: 0 + } + } + attr { + key: "end_mask" + value { + i: 0 + } + } + attr { + key: "new_axis_mask" + value { + i: 0 + } + } + attr { + key: "shrink_axis_mask" + value { + i: 0 + } + } +} +node { + name: "report_uninitialized_variables/boolean_mask/Prod/reduction_indices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 0 + } + } + } +} +node { + name: "report_uninitialized_variables/boolean_mask/Prod" + op: "Prod" + input: "report_uninitialized_variables/boolean_mask/strided_slice" + input: "report_uninitialized_variables/boolean_mask/Prod/reduction_indices" + device: "/device:CPU:0" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "report_uninitialized_variables/boolean_mask/Shape_1" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 97 + } + } + } +} +node { + name: "report_uninitialized_variables/boolean_mask/strided_slice_1/stack" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 0 + } + } + } +} +node { + name: "report_uninitialized_variables/boolean_mask/strided_slice_1/stack_1" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 0 + } + } + } +} +node { + name: "report_uninitialized_variables/boolean_mask/strided_slice_1/stack_2" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "report_uninitialized_variables/boolean_mask/strided_slice_1" + op: "StridedSlice" + input: "report_uninitialized_variables/boolean_mask/Shape_1" + input: "report_uninitialized_variables/boolean_mask/strided_slice_1/stack" + input: "report_uninitialized_variables/boolean_mask/strided_slice_1/stack_1" + input: "report_uninitialized_variables/boolean_mask/strided_slice_1/stack_2" + device: "/device:CPU:0" + attr { + key: "Index" + value { + type: DT_INT32 + } + } + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "begin_mask" + value { + i: 1 + } + } + attr { + key: "ellipsis_mask" + value { + i: 0 + } + } + attr { + key: "end_mask" + value { + i: 0 + } + } + attr { + key: "new_axis_mask" + value { + i: 0 + } + } + attr { + key: "shrink_axis_mask" + value { + i: 0 + } + } +} +node { + name: "report_uninitialized_variables/boolean_mask/Shape_2" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 97 + } + } + } +} +node { + name: "report_uninitialized_variables/boolean_mask/strided_slice_2/stack" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "report_uninitialized_variables/boolean_mask/strided_slice_2/stack_1" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 0 + } + } + } +} +node { + name: "report_uninitialized_variables/boolean_mask/strided_slice_2/stack_2" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "report_uninitialized_variables/boolean_mask/strided_slice_2" + op: "StridedSlice" + input: "report_uninitialized_variables/boolean_mask/Shape_2" + input: "report_uninitialized_variables/boolean_mask/strided_slice_2/stack" + input: "report_uninitialized_variables/boolean_mask/strided_slice_2/stack_1" + input: "report_uninitialized_variables/boolean_mask/strided_slice_2/stack_2" + device: "/device:CPU:0" + attr { + key: "Index" + value { + type: DT_INT32 + } + } + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "begin_mask" + value { + i: 0 + } + } + attr { + key: "ellipsis_mask" + value { + i: 0 + } + } + attr { + key: "end_mask" + value { + i: 1 + } + } + attr { + key: "new_axis_mask" + value { + i: 0 + } + } + attr { + key: "shrink_axis_mask" + value { + i: 0 + } + } +} +node { + name: "report_uninitialized_variables/boolean_mask/concat/values_1" + op: "Pack" + input: "report_uninitialized_variables/boolean_mask/Prod" + device: "/device:CPU:0" + attr { + key: "N" + value { + i: 1 + } + } + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "axis" + value { + i: 0 + } + } +} +node { + name: "report_uninitialized_variables/boolean_mask/concat/axis" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 0 + } + } + } +} +node { + name: "report_uninitialized_variables/boolean_mask/concat" + op: "ConcatV2" + input: "report_uninitialized_variables/boolean_mask/strided_slice_1" + input: "report_uninitialized_variables/boolean_mask/concat/values_1" + input: "report_uninitialized_variables/boolean_mask/strided_slice_2" + input: "report_uninitialized_variables/boolean_mask/concat/axis" + device: "/device:CPU:0" + attr { + key: "N" + value { + i: 3 + } + } + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "report_uninitialized_variables/boolean_mask/Reshape" + op: "Reshape" + input: "report_uninitialized_variables/Const" + input: "report_uninitialized_variables/boolean_mask/concat" + device: "/device:CPU:0" + attr { + key: "T" + value { + type: DT_STRING + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 97 + } + } + } + } + } +} +node { + name: "report_uninitialized_variables/boolean_mask/Reshape_1/shape" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: -1 + } + } + } +} +node { + name: "report_uninitialized_variables/boolean_mask/Reshape_1" + op: "Reshape" + input: "report_uninitialized_variables/LogicalNot" + input: "report_uninitialized_variables/boolean_mask/Reshape_1/shape" + device: "/device:CPU:0" + attr { + key: "T" + value { + type: DT_BOOL + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 97 + } + } + } + } + } +} +node { + name: "report_uninitialized_variables/boolean_mask/Where" + op: "Where" + input: "report_uninitialized_variables/boolean_mask/Reshape_1" + device: "/device:CPU:0" + attr { + key: "T" + value { + type: DT_BOOL + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "report_uninitialized_variables/boolean_mask/Squeeze" + op: "Squeeze" + input: "report_uninitialized_variables/boolean_mask/Where" + device: "/device:CPU:0" + attr { + key: "T" + value { + type: DT_INT64 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + } + } + } + attr { + key: "squeeze_dims" + value { + list { + i: 1 + } + } + } +} +node { + name: "report_uninitialized_variables/boolean_mask/GatherV2/axis" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 0 + } + } + } +} +node { + name: "report_uninitialized_variables/boolean_mask/GatherV2" + op: "GatherV2" + input: "report_uninitialized_variables/boolean_mask/Reshape" + input: "report_uninitialized_variables/boolean_mask/Squeeze" + input: "report_uninitialized_variables/boolean_mask/GatherV2/axis" + device: "/device:CPU:0" + attr { + key: "Taxis" + value { + type: DT_INT32 + } + } + attr { + key: "Tindices" + value { + type: DT_INT64 + } + } + attr { + key: "Tparams" + value { + type: DT_STRING + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + } + } + } + attr { + key: "batch_dims" + value { + i: 0 + } + } +} +node { + name: "report_uninitialized_resources/Const" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "concat/axis" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 0 + } + } + } +} +node { + name: "concat" + op: "ConcatV2" + input: "report_uninitialized_variables/boolean_mask/GatherV2" + input: "report_uninitialized_resources/Const" + input: "concat/axis" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_STRING + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "report_uninitialized_variables_1/VarIsInitializedOp" + op: "VarIsInitializedOp" + input: "global_step" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized" + op: "IsVariableInitialized" + input: "bert/embeddings/word_embeddings" + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/word_embeddings" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_1" + op: "IsVariableInitialized" + input: "bert/embeddings/token_type_embeddings" + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/token_type_embeddings" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_2" + op: "IsVariableInitialized" + input: "bert/embeddings/position_embeddings" + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/position_embeddings" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_3" + op: "IsVariableInitialized" + input: "bert/embeddings/LayerNorm/beta" + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_4" + op: "IsVariableInitialized" + input: "bert/embeddings/LayerNorm/gamma" + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_5" + op: "IsVariableInitialized" + input: "bert/encoder/embedding_hidden_mapping_in/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/embedding_hidden_mapping_in/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_6" + op: "IsVariableInitialized" + input: "bert/encoder/embedding_hidden_mapping_in/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/embedding_hidden_mapping_in/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_7" + op: "IsVariableInitialized" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_8" + op: "IsVariableInitialized" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_9" + op: "IsVariableInitialized" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_10" + op: "IsVariableInitialized" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_11" + op: "IsVariableInitialized" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/value/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_12" + op: "IsVariableInitialized" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/value/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/self/value/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_13" + op: "IsVariableInitialized" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/output/dense/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_14" + op: "IsVariableInitialized" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/output/dense/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_15" + op: "IsVariableInitialized" + input: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm/beta" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_16" + op: "IsVariableInitialized" + input: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm/gamma" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_17" + op: "IsVariableInitialized" + input: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_18" + op: "IsVariableInitialized" + input: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_19" + op: "IsVariableInitialized" + input: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_20" + op: "IsVariableInitialized" + input: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_21" + op: "IsVariableInitialized" + input: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm_1/beta" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/LayerNorm_1/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_22" + op: "IsVariableInitialized" + input: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm_1/gamma" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/LayerNorm_1/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_23" + op: "IsVariableInitialized" + input: "bert/pooler/dense/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@bert/pooler/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_24" + op: "IsVariableInitialized" + input: "bert/pooler/dense/bias" + attr { + key: "_class" + value { + list { + s: "loc:@bert/pooler/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_25" + op: "IsVariableInitialized" + input: "cls/predictions/transform/dense/kernel" + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/transform/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_26" + op: "IsVariableInitialized" + input: "cls/predictions/transform/dense/bias" + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/transform/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_27" + op: "IsVariableInitialized" + input: "cls/predictions/transform/LayerNorm/beta" + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/transform/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_28" + op: "IsVariableInitialized" + input: "cls/predictions/transform/LayerNorm/gamma" + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/transform/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_29" + op: "IsVariableInitialized" + input: "cls/predictions/output_bias" + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/output_bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_30" + op: "IsVariableInitialized" + input: "cls/seq_relationship/output_weights" + attr { + key: "_class" + value { + list { + s: "loc:@cls/seq_relationship/output_weights" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_31" + op: "IsVariableInitialized" + input: "cls/seq_relationship/output_bias" + attr { + key: "_class" + value { + list { + s: "loc:@cls/seq_relationship/output_bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_32" + op: "IsVariableInitialized" + input: "bert/embeddings/word_embeddings/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/word_embeddings/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_33" + op: "IsVariableInitialized" + input: "bert/embeddings/word_embeddings/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/word_embeddings/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_34" + op: "IsVariableInitialized" + input: "bert/embeddings/token_type_embeddings/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/token_type_embeddings/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_35" + op: "IsVariableInitialized" + input: "bert/embeddings/token_type_embeddings/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/token_type_embeddings/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_36" + op: "IsVariableInitialized" + input: "bert/embeddings/position_embeddings/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/position_embeddings/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_37" + op: "IsVariableInitialized" + input: "bert/embeddings/position_embeddings/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/position_embeddings/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_38" + op: "IsVariableInitialized" + input: "bert/embeddings/LayerNorm/beta/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_39" + op: "IsVariableInitialized" + input: "bert/embeddings/LayerNorm/beta/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_40" + op: "IsVariableInitialized" + input: "bert/embeddings/LayerNorm/gamma/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_41" + op: "IsVariableInitialized" + input: "bert/embeddings/LayerNorm/gamma/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_42" + op: "IsVariableInitialized" + input: "bert/encoder/embedding_hidden_mapping_in/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/embedding_hidden_mapping_in/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_43" + op: "IsVariableInitialized" + input: "bert/encoder/embedding_hidden_mapping_in/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/embedding_hidden_mapping_in/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_44" + op: "IsVariableInitialized" + input: "bert/encoder/embedding_hidden_mapping_in/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/embedding_hidden_mapping_in/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_45" + op: "IsVariableInitialized" + input: "bert/encoder/embedding_hidden_mapping_in/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/embedding_hidden_mapping_in/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_46" + op: "IsVariableInitialized" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_47" + op: "IsVariableInitialized" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_48" + op: "IsVariableInitialized" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_49" + op: "IsVariableInitialized" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_50" + op: "IsVariableInitialized" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_51" + op: "IsVariableInitialized" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_52" + op: "IsVariableInitialized" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_53" + op: "IsVariableInitialized" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_54" + op: "IsVariableInitialized" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/value/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_55" + op: "IsVariableInitialized" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/value/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_56" + op: "IsVariableInitialized" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/value/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/self/value/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_57" + op: "IsVariableInitialized" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/value/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/self/value/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_58" + op: "IsVariableInitialized" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/output/dense/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_59" + op: "IsVariableInitialized" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/output/dense/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_60" + op: "IsVariableInitialized" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/output/dense/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_61" + op: "IsVariableInitialized" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/output/dense/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_62" + op: "IsVariableInitialized" + input: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm/beta/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_63" + op: "IsVariableInitialized" + input: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm/beta/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_64" + op: "IsVariableInitialized" + input: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm/gamma/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_65" + op: "IsVariableInitialized" + input: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm/gamma/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_66" + op: "IsVariableInitialized" + input: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_67" + op: "IsVariableInitialized" + input: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_68" + op: "IsVariableInitialized" + input: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_69" + op: "IsVariableInitialized" + input: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_70" + op: "IsVariableInitialized" + input: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_71" + op: "IsVariableInitialized" + input: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_72" + op: "IsVariableInitialized" + input: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_73" + op: "IsVariableInitialized" + input: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_74" + op: "IsVariableInitialized" + input: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm_1/beta/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/LayerNorm_1/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_75" + op: "IsVariableInitialized" + input: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm_1/beta/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/LayerNorm_1/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_76" + op: "IsVariableInitialized" + input: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm_1/gamma/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/LayerNorm_1/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_77" + op: "IsVariableInitialized" + input: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm_1/gamma/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/LayerNorm_1/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_78" + op: "IsVariableInitialized" + input: "bert/pooler/dense/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/pooler/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_79" + op: "IsVariableInitialized" + input: "bert/pooler/dense/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/pooler/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_80" + op: "IsVariableInitialized" + input: "bert/pooler/dense/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@bert/pooler/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_81" + op: "IsVariableInitialized" + input: "bert/pooler/dense/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@bert/pooler/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_82" + op: "IsVariableInitialized" + input: "cls/predictions/transform/dense/kernel/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/transform/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_83" + op: "IsVariableInitialized" + input: "cls/predictions/transform/dense/kernel/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/transform/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_84" + op: "IsVariableInitialized" + input: "cls/predictions/transform/dense/bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/transform/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_85" + op: "IsVariableInitialized" + input: "cls/predictions/transform/dense/bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/transform/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_86" + op: "IsVariableInitialized" + input: "cls/predictions/transform/LayerNorm/beta/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/transform/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_87" + op: "IsVariableInitialized" + input: "cls/predictions/transform/LayerNorm/beta/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/transform/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_88" + op: "IsVariableInitialized" + input: "cls/predictions/transform/LayerNorm/gamma/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/transform/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_89" + op: "IsVariableInitialized" + input: "cls/predictions/transform/LayerNorm/gamma/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/transform/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_90" + op: "IsVariableInitialized" + input: "cls/predictions/output_bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/output_bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_91" + op: "IsVariableInitialized" + input: "cls/predictions/output_bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/output_bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_92" + op: "IsVariableInitialized" + input: "cls/seq_relationship/output_weights/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@cls/seq_relationship/output_weights/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_93" + op: "IsVariableInitialized" + input: "cls/seq_relationship/output_weights/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@cls/seq_relationship/output_weights/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_94" + op: "IsVariableInitialized" + input: "cls/seq_relationship/output_bias/adam_m" + attr { + key: "_class" + value { + list { + s: "loc:@cls/seq_relationship/output_bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/IsVariableInitialized_95" + op: "IsVariableInitialized" + input: "cls/seq_relationship/output_bias/adam_v" + attr { + key: "_class" + value { + list { + s: "loc:@cls/seq_relationship/output_bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } +} +node { + name: "report_uninitialized_variables_1/stack" + op: "Pack" + input: "report_uninitialized_variables_1/VarIsInitializedOp" + input: "report_uninitialized_variables_1/IsVariableInitialized" + input: "report_uninitialized_variables_1/IsVariableInitialized_1" + input: "report_uninitialized_variables_1/IsVariableInitialized_2" + input: "report_uninitialized_variables_1/IsVariableInitialized_3" + input: "report_uninitialized_variables_1/IsVariableInitialized_4" + input: "report_uninitialized_variables_1/IsVariableInitialized_5" + input: "report_uninitialized_variables_1/IsVariableInitialized_6" + input: "report_uninitialized_variables_1/IsVariableInitialized_7" + input: "report_uninitialized_variables_1/IsVariableInitialized_8" + input: "report_uninitialized_variables_1/IsVariableInitialized_9" + input: "report_uninitialized_variables_1/IsVariableInitialized_10" + input: "report_uninitialized_variables_1/IsVariableInitialized_11" + input: "report_uninitialized_variables_1/IsVariableInitialized_12" + input: "report_uninitialized_variables_1/IsVariableInitialized_13" + input: "report_uninitialized_variables_1/IsVariableInitialized_14" + input: "report_uninitialized_variables_1/IsVariableInitialized_15" + input: "report_uninitialized_variables_1/IsVariableInitialized_16" + input: "report_uninitialized_variables_1/IsVariableInitialized_17" + input: "report_uninitialized_variables_1/IsVariableInitialized_18" + input: "report_uninitialized_variables_1/IsVariableInitialized_19" + input: "report_uninitialized_variables_1/IsVariableInitialized_20" + input: "report_uninitialized_variables_1/IsVariableInitialized_21" + input: "report_uninitialized_variables_1/IsVariableInitialized_22" + input: "report_uninitialized_variables_1/IsVariableInitialized_23" + input: "report_uninitialized_variables_1/IsVariableInitialized_24" + input: "report_uninitialized_variables_1/IsVariableInitialized_25" + input: "report_uninitialized_variables_1/IsVariableInitialized_26" + input: "report_uninitialized_variables_1/IsVariableInitialized_27" + input: "report_uninitialized_variables_1/IsVariableInitialized_28" + input: "report_uninitialized_variables_1/IsVariableInitialized_29" + input: "report_uninitialized_variables_1/IsVariableInitialized_30" + input: "report_uninitialized_variables_1/IsVariableInitialized_31" + input: "report_uninitialized_variables_1/IsVariableInitialized_32" + input: "report_uninitialized_variables_1/IsVariableInitialized_33" + input: "report_uninitialized_variables_1/IsVariableInitialized_34" + input: "report_uninitialized_variables_1/IsVariableInitialized_35" + input: "report_uninitialized_variables_1/IsVariableInitialized_36" + input: "report_uninitialized_variables_1/IsVariableInitialized_37" + input: "report_uninitialized_variables_1/IsVariableInitialized_38" + input: "report_uninitialized_variables_1/IsVariableInitialized_39" + input: "report_uninitialized_variables_1/IsVariableInitialized_40" + input: "report_uninitialized_variables_1/IsVariableInitialized_41" + input: "report_uninitialized_variables_1/IsVariableInitialized_42" + input: "report_uninitialized_variables_1/IsVariableInitialized_43" + input: "report_uninitialized_variables_1/IsVariableInitialized_44" + input: "report_uninitialized_variables_1/IsVariableInitialized_45" + input: "report_uninitialized_variables_1/IsVariableInitialized_46" + input: "report_uninitialized_variables_1/IsVariableInitialized_47" + input: "report_uninitialized_variables_1/IsVariableInitialized_48" + input: "report_uninitialized_variables_1/IsVariableInitialized_49" + input: "report_uninitialized_variables_1/IsVariableInitialized_50" + input: "report_uninitialized_variables_1/IsVariableInitialized_51" + input: "report_uninitialized_variables_1/IsVariableInitialized_52" + input: "report_uninitialized_variables_1/IsVariableInitialized_53" + input: "report_uninitialized_variables_1/IsVariableInitialized_54" + input: "report_uninitialized_variables_1/IsVariableInitialized_55" + input: "report_uninitialized_variables_1/IsVariableInitialized_56" + input: "report_uninitialized_variables_1/IsVariableInitialized_57" + input: "report_uninitialized_variables_1/IsVariableInitialized_58" + input: "report_uninitialized_variables_1/IsVariableInitialized_59" + input: "report_uninitialized_variables_1/IsVariableInitialized_60" + input: "report_uninitialized_variables_1/IsVariableInitialized_61" + input: "report_uninitialized_variables_1/IsVariableInitialized_62" + input: "report_uninitialized_variables_1/IsVariableInitialized_63" + input: "report_uninitialized_variables_1/IsVariableInitialized_64" + input: "report_uninitialized_variables_1/IsVariableInitialized_65" + input: "report_uninitialized_variables_1/IsVariableInitialized_66" + input: "report_uninitialized_variables_1/IsVariableInitialized_67" + input: "report_uninitialized_variables_1/IsVariableInitialized_68" + input: "report_uninitialized_variables_1/IsVariableInitialized_69" + input: "report_uninitialized_variables_1/IsVariableInitialized_70" + input: "report_uninitialized_variables_1/IsVariableInitialized_71" + input: "report_uninitialized_variables_1/IsVariableInitialized_72" + input: "report_uninitialized_variables_1/IsVariableInitialized_73" + input: "report_uninitialized_variables_1/IsVariableInitialized_74" + input: "report_uninitialized_variables_1/IsVariableInitialized_75" + input: "report_uninitialized_variables_1/IsVariableInitialized_76" + input: "report_uninitialized_variables_1/IsVariableInitialized_77" + input: "report_uninitialized_variables_1/IsVariableInitialized_78" + input: "report_uninitialized_variables_1/IsVariableInitialized_79" + input: "report_uninitialized_variables_1/IsVariableInitialized_80" + input: "report_uninitialized_variables_1/IsVariableInitialized_81" + input: "report_uninitialized_variables_1/IsVariableInitialized_82" + input: "report_uninitialized_variables_1/IsVariableInitialized_83" + input: "report_uninitialized_variables_1/IsVariableInitialized_84" + input: "report_uninitialized_variables_1/IsVariableInitialized_85" + input: "report_uninitialized_variables_1/IsVariableInitialized_86" + input: "report_uninitialized_variables_1/IsVariableInitialized_87" + input: "report_uninitialized_variables_1/IsVariableInitialized_88" + input: "report_uninitialized_variables_1/IsVariableInitialized_89" + input: "report_uninitialized_variables_1/IsVariableInitialized_90" + input: "report_uninitialized_variables_1/IsVariableInitialized_91" + input: "report_uninitialized_variables_1/IsVariableInitialized_92" + input: "report_uninitialized_variables_1/IsVariableInitialized_93" + input: "report_uninitialized_variables_1/IsVariableInitialized_94" + input: "report_uninitialized_variables_1/IsVariableInitialized_95" + device: "/device:CPU:0" + attr { + key: "N" + value { + i: 97 + } + } + attr { + key: "T" + value { + type: DT_BOOL + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 97 + } + } + } + } + } + attr { + key: "axis" + value { + i: 0 + } + } +} +node { + name: "report_uninitialized_variables_1/LogicalNot" + op: "LogicalNot" + input: "report_uninitialized_variables_1/stack" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 97 + } + } + } + } + } +} +node { + name: "report_uninitialized_variables_1/Const" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 97 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 97 + } + } + string_val: "global_step" + string_val: "bert/embeddings/word_embeddings" + string_val: "bert/embeddings/token_type_embeddings" + string_val: "bert/embeddings/position_embeddings" + string_val: "bert/embeddings/LayerNorm/beta" + string_val: "bert/embeddings/LayerNorm/gamma" + string_val: "bert/encoder/embedding_hidden_mapping_in/kernel" + string_val: "bert/encoder/embedding_hidden_mapping_in/bias" + string_val: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/kernel" + string_val: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/bias" + string_val: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/kernel" + string_val: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/bias" + string_val: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/value/kernel" + string_val: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/value/bias" + string_val: "bert/encoder/transformer/group_0/inner_group_0/attention_1/output/dense/kernel" + string_val: "bert/encoder/transformer/group_0/inner_group_0/attention_1/output/dense/bias" + string_val: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm/beta" + string_val: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm/gamma" + string_val: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/kernel" + string_val: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/bias" + string_val: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/kernel" + string_val: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/bias" + string_val: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm_1/beta" + string_val: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm_1/gamma" + string_val: "bert/pooler/dense/kernel" + string_val: "bert/pooler/dense/bias" + string_val: "cls/predictions/transform/dense/kernel" + string_val: "cls/predictions/transform/dense/bias" + string_val: "cls/predictions/transform/LayerNorm/beta" + string_val: "cls/predictions/transform/LayerNorm/gamma" + string_val: "cls/predictions/output_bias" + string_val: "cls/seq_relationship/output_weights" + string_val: "cls/seq_relationship/output_bias" + string_val: "bert/embeddings/word_embeddings/adam_m" + string_val: "bert/embeddings/word_embeddings/adam_v" + string_val: "bert/embeddings/token_type_embeddings/adam_m" + string_val: "bert/embeddings/token_type_embeddings/adam_v" + string_val: "bert/embeddings/position_embeddings/adam_m" + string_val: "bert/embeddings/position_embeddings/adam_v" + string_val: "bert/embeddings/LayerNorm/beta/adam_m" + string_val: "bert/embeddings/LayerNorm/beta/adam_v" + string_val: "bert/embeddings/LayerNorm/gamma/adam_m" + string_val: "bert/embeddings/LayerNorm/gamma/adam_v" + string_val: "bert/encoder/embedding_hidden_mapping_in/kernel/adam_m" + string_val: "bert/encoder/embedding_hidden_mapping_in/kernel/adam_v" + string_val: "bert/encoder/embedding_hidden_mapping_in/bias/adam_m" + string_val: "bert/encoder/embedding_hidden_mapping_in/bias/adam_v" + string_val: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/kernel/adam_m" + string_val: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/kernel/adam_v" + string_val: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/bias/adam_m" + string_val: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/bias/adam_v" + string_val: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/kernel/adam_m" + string_val: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/kernel/adam_v" + string_val: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/bias/adam_m" + string_val: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/bias/adam_v" + string_val: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/value/kernel/adam_m" + string_val: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/value/kernel/adam_v" + string_val: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/value/bias/adam_m" + string_val: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/value/bias/adam_v" + string_val: "bert/encoder/transformer/group_0/inner_group_0/attention_1/output/dense/kernel/adam_m" + string_val: "bert/encoder/transformer/group_0/inner_group_0/attention_1/output/dense/kernel/adam_v" + string_val: "bert/encoder/transformer/group_0/inner_group_0/attention_1/output/dense/bias/adam_m" + string_val: "bert/encoder/transformer/group_0/inner_group_0/attention_1/output/dense/bias/adam_v" + string_val: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm/beta/adam_m" + string_val: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm/beta/adam_v" + string_val: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm/gamma/adam_m" + string_val: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm/gamma/adam_v" + string_val: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/kernel/adam_m" + string_val: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/kernel/adam_v" + string_val: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/bias/adam_m" + string_val: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/bias/adam_v" + string_val: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/kernel/adam_m" + string_val: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/kernel/adam_v" + string_val: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/bias/adam_m" + string_val: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/bias/adam_v" + string_val: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm_1/beta/adam_m" + string_val: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm_1/beta/adam_v" + string_val: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm_1/gamma/adam_m" + string_val: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm_1/gamma/adam_v" + string_val: "bert/pooler/dense/kernel/adam_m" + string_val: "bert/pooler/dense/kernel/adam_v" + string_val: "bert/pooler/dense/bias/adam_m" + string_val: "bert/pooler/dense/bias/adam_v" + string_val: "cls/predictions/transform/dense/kernel/adam_m" + string_val: "cls/predictions/transform/dense/kernel/adam_v" + string_val: "cls/predictions/transform/dense/bias/adam_m" + string_val: "cls/predictions/transform/dense/bias/adam_v" + string_val: "cls/predictions/transform/LayerNorm/beta/adam_m" + string_val: "cls/predictions/transform/LayerNorm/beta/adam_v" + string_val: "cls/predictions/transform/LayerNorm/gamma/adam_m" + string_val: "cls/predictions/transform/LayerNorm/gamma/adam_v" + string_val: "cls/predictions/output_bias/adam_m" + string_val: "cls/predictions/output_bias/adam_v" + string_val: "cls/seq_relationship/output_weights/adam_m" + string_val: "cls/seq_relationship/output_weights/adam_v" + string_val: "cls/seq_relationship/output_bias/adam_m" + string_val: "cls/seq_relationship/output_bias/adam_v" + } + } + } +} +node { + name: "report_uninitialized_variables_1/boolean_mask/Shape" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 97 + } + } + } +} +node { + name: "report_uninitialized_variables_1/boolean_mask/strided_slice/stack" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 0 + } + } + } +} +node { + name: "report_uninitialized_variables_1/boolean_mask/strided_slice/stack_1" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "report_uninitialized_variables_1/boolean_mask/strided_slice/stack_2" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "report_uninitialized_variables_1/boolean_mask/strided_slice" + op: "StridedSlice" + input: "report_uninitialized_variables_1/boolean_mask/Shape" + input: "report_uninitialized_variables_1/boolean_mask/strided_slice/stack" + input: "report_uninitialized_variables_1/boolean_mask/strided_slice/stack_1" + input: "report_uninitialized_variables_1/boolean_mask/strided_slice/stack_2" + device: "/device:CPU:0" + attr { + key: "Index" + value { + type: DT_INT32 + } + } + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "begin_mask" + value { + i: 0 + } + } + attr { + key: "ellipsis_mask" + value { + i: 0 + } + } + attr { + key: "end_mask" + value { + i: 0 + } + } + attr { + key: "new_axis_mask" + value { + i: 0 + } + } + attr { + key: "shrink_axis_mask" + value { + i: 0 + } + } +} +node { + name: "report_uninitialized_variables_1/boolean_mask/Prod/reduction_indices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 0 + } + } + } +} +node { + name: "report_uninitialized_variables_1/boolean_mask/Prod" + op: "Prod" + input: "report_uninitialized_variables_1/boolean_mask/strided_slice" + input: "report_uninitialized_variables_1/boolean_mask/Prod/reduction_indices" + device: "/device:CPU:0" + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "keep_dims" + value { + b: false + } + } +} +node { + name: "report_uninitialized_variables_1/boolean_mask/Shape_1" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 97 + } + } + } +} +node { + name: "report_uninitialized_variables_1/boolean_mask/strided_slice_1/stack" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 0 + } + } + } +} +node { + name: "report_uninitialized_variables_1/boolean_mask/strided_slice_1/stack_1" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 0 + } + } + } +} +node { + name: "report_uninitialized_variables_1/boolean_mask/strided_slice_1/stack_2" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "report_uninitialized_variables_1/boolean_mask/strided_slice_1" + op: "StridedSlice" + input: "report_uninitialized_variables_1/boolean_mask/Shape_1" + input: "report_uninitialized_variables_1/boolean_mask/strided_slice_1/stack" + input: "report_uninitialized_variables_1/boolean_mask/strided_slice_1/stack_1" + input: "report_uninitialized_variables_1/boolean_mask/strided_slice_1/stack_2" + device: "/device:CPU:0" + attr { + key: "Index" + value { + type: DT_INT32 + } + } + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "begin_mask" + value { + i: 1 + } + } + attr { + key: "ellipsis_mask" + value { + i: 0 + } + } + attr { + key: "end_mask" + value { + i: 0 + } + } + attr { + key: "new_axis_mask" + value { + i: 0 + } + } + attr { + key: "shrink_axis_mask" + value { + i: 0 + } + } +} +node { + name: "report_uninitialized_variables_1/boolean_mask/Shape_2" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 97 + } + } + } +} +node { + name: "report_uninitialized_variables_1/boolean_mask/strided_slice_2/stack" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "report_uninitialized_variables_1/boolean_mask/strided_slice_2/stack_1" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 0 + } + } + } +} +node { + name: "report_uninitialized_variables_1/boolean_mask/strided_slice_2/stack_2" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: 1 + } + } + } +} +node { + name: "report_uninitialized_variables_1/boolean_mask/strided_slice_2" + op: "StridedSlice" + input: "report_uninitialized_variables_1/boolean_mask/Shape_2" + input: "report_uninitialized_variables_1/boolean_mask/strided_slice_2/stack" + input: "report_uninitialized_variables_1/boolean_mask/strided_slice_2/stack_1" + input: "report_uninitialized_variables_1/boolean_mask/strided_slice_2/stack_2" + device: "/device:CPU:0" + attr { + key: "Index" + value { + type: DT_INT32 + } + } + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "begin_mask" + value { + i: 0 + } + } + attr { + key: "ellipsis_mask" + value { + i: 0 + } + } + attr { + key: "end_mask" + value { + i: 1 + } + } + attr { + key: "new_axis_mask" + value { + i: 0 + } + } + attr { + key: "shrink_axis_mask" + value { + i: 0 + } + } +} +node { + name: "report_uninitialized_variables_1/boolean_mask/concat/values_1" + op: "Pack" + input: "report_uninitialized_variables_1/boolean_mask/Prod" + device: "/device:CPU:0" + attr { + key: "N" + value { + i: 1 + } + } + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "axis" + value { + i: 0 + } + } +} +node { + name: "report_uninitialized_variables_1/boolean_mask/concat/axis" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 0 + } + } + } +} +node { + name: "report_uninitialized_variables_1/boolean_mask/concat" + op: "ConcatV2" + input: "report_uninitialized_variables_1/boolean_mask/strided_slice_1" + input: "report_uninitialized_variables_1/boolean_mask/concat/values_1" + input: "report_uninitialized_variables_1/boolean_mask/strided_slice_2" + input: "report_uninitialized_variables_1/boolean_mask/concat/axis" + device: "/device:CPU:0" + attr { + key: "N" + value { + i: 3 + } + } + attr { + key: "T" + value { + type: DT_INT32 + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } +} +node { + name: "report_uninitialized_variables_1/boolean_mask/Reshape" + op: "Reshape" + input: "report_uninitialized_variables_1/Const" + input: "report_uninitialized_variables_1/boolean_mask/concat" + device: "/device:CPU:0" + attr { + key: "T" + value { + type: DT_STRING + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 97 + } + } + } + } + } +} +node { + name: "report_uninitialized_variables_1/boolean_mask/Reshape_1/shape" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: -1 + } + } + } +} +node { + name: "report_uninitialized_variables_1/boolean_mask/Reshape_1" + op: "Reshape" + input: "report_uninitialized_variables_1/LogicalNot" + input: "report_uninitialized_variables_1/boolean_mask/Reshape_1/shape" + device: "/device:CPU:0" + attr { + key: "T" + value { + type: DT_BOOL + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 97 + } + } + } + } + } +} +node { + name: "report_uninitialized_variables_1/boolean_mask/Where" + op: "Where" + input: "report_uninitialized_variables_1/boolean_mask/Reshape_1" + device: "/device:CPU:0" + attr { + key: "T" + value { + type: DT_BOOL + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + dim { + size: 1 + } + } + } + } + } +} +node { + name: "report_uninitialized_variables_1/boolean_mask/Squeeze" + op: "Squeeze" + input: "report_uninitialized_variables_1/boolean_mask/Where" + device: "/device:CPU:0" + attr { + key: "T" + value { + type: DT_INT64 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + } + } + } + attr { + key: "squeeze_dims" + value { + list { + i: 1 + } + } + } +} +node { + name: "report_uninitialized_variables_1/boolean_mask/GatherV2/axis" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 0 + } + } + } +} +node { + name: "report_uninitialized_variables_1/boolean_mask/GatherV2" + op: "GatherV2" + input: "report_uninitialized_variables_1/boolean_mask/Reshape" + input: "report_uninitialized_variables_1/boolean_mask/Squeeze" + input: "report_uninitialized_variables_1/boolean_mask/GatherV2/axis" + device: "/device:CPU:0" + attr { + key: "Taxis" + value { + type: DT_INT32 + } + } + attr { + key: "Tindices" + value { + type: DT_INT64 + } + } + attr { + key: "Tparams" + value { + type: DT_STRING + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + } + } + } + attr { + key: "batch_dims" + value { + i: 0 + } + } +} +node { + name: "report_uninitialized_resources_1/Const" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + } + } + } + } + } +} +node { + name: "concat_1/axis" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 0 + } + } + } +} +node { + name: "concat_1" + op: "ConcatV2" + input: "report_uninitialized_variables_1/boolean_mask/GatherV2" + input: "report_uninitialized_resources_1/Const" + input: "concat_1/axis" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "T" + value { + type: DT_STRING + } + } + attr { + key: "Tidx" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: -1 + } + } + } + } + } +} +node { + name: "init_2" + op: "NoOp" +} +node { + name: "init_all_tables" + op: "NoOp" +} +node { + name: "init_3" + op: "NoOp" +} +node { + name: "group_deps_3" + op: "NoOp" + input: "^init_2" + input: "^init_3" + input: "^init_all_tables" +} +node { + name: "Merge/MergeSummary" + op: "MergeSummary" + input: "loss" + attr { + key: "N" + value { + i: 1 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "save/filename/input" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "model" + } + } + } +} +node { + name: "save/filename" + op: "PlaceholderWithDefault" + input: "save/filename/input" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "shape" + value { + shape { + } + } + } +} +node { + name: "save/Const" + op: "PlaceholderWithDefault" + input: "save/filename" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "shape" + value { + shape { + } + } + } +} +node { + name: "save/StringJoin/inputs_1" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "_temp_6305ee9d46c046fb90cbd64454bcd8e5/part" + } + } + } +} +node { + name: "save/StringJoin" + op: "StringJoin" + input: "save/Const" + input: "save/StringJoin/inputs_1" + attr { + key: "N" + value { + i: 2 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "separator" + value { + s: "" + } + } +} +node { + name: "save/num_shards" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 1 + } + } + } +} +node { + name: "save/ShardedFilename/shard" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + } + int_val: 0 + } + } + } +} +node { + name: "save/ShardedFilename" + op: "ShardedFilename" + input: "save/StringJoin" + input: "save/ShardedFilename/shard" + input: "save/num_shards" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "save/SaveV2/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 97 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 97 + } + } + string_val: "bert/embeddings/LayerNorm/beta" + string_val: "bert/embeddings/LayerNorm/beta/adam_m" + string_val: "bert/embeddings/LayerNorm/beta/adam_v" + string_val: "bert/embeddings/LayerNorm/gamma" + string_val: "bert/embeddings/LayerNorm/gamma/adam_m" + string_val: "bert/embeddings/LayerNorm/gamma/adam_v" + string_val: "bert/embeddings/position_embeddings" + string_val: "bert/embeddings/position_embeddings/adam_m" + string_val: "bert/embeddings/position_embeddings/adam_v" + string_val: "bert/embeddings/token_type_embeddings" + string_val: "bert/embeddings/token_type_embeddings/adam_m" + string_val: "bert/embeddings/token_type_embeddings/adam_v" + string_val: "bert/embeddings/word_embeddings" + string_val: "bert/embeddings/word_embeddings/adam_m" + string_val: "bert/embeddings/word_embeddings/adam_v" + string_val: "bert/encoder/embedding_hidden_mapping_in/bias" + string_val: "bert/encoder/embedding_hidden_mapping_in/bias/adam_m" + string_val: "bert/encoder/embedding_hidden_mapping_in/bias/adam_v" + string_val: "bert/encoder/embedding_hidden_mapping_in/kernel" + string_val: "bert/encoder/embedding_hidden_mapping_in/kernel/adam_m" + string_val: "bert/encoder/embedding_hidden_mapping_in/kernel/adam_v" + string_val: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm/beta" + string_val: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm/beta/adam_m" + string_val: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm/beta/adam_v" + string_val: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm/gamma" + string_val: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm/gamma/adam_m" + string_val: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm/gamma/adam_v" + string_val: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm_1/beta" + string_val: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm_1/beta/adam_m" + string_val: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm_1/beta/adam_v" + string_val: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm_1/gamma" + string_val: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm_1/gamma/adam_m" + string_val: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm_1/gamma/adam_v" + string_val: "bert/encoder/transformer/group_0/inner_group_0/attention_1/output/dense/bias" + string_val: "bert/encoder/transformer/group_0/inner_group_0/attention_1/output/dense/bias/adam_m" + string_val: "bert/encoder/transformer/group_0/inner_group_0/attention_1/output/dense/bias/adam_v" + string_val: "bert/encoder/transformer/group_0/inner_group_0/attention_1/output/dense/kernel" + string_val: "bert/encoder/transformer/group_0/inner_group_0/attention_1/output/dense/kernel/adam_m" + string_val: "bert/encoder/transformer/group_0/inner_group_0/attention_1/output/dense/kernel/adam_v" + string_val: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/bias" + string_val: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/bias/adam_m" + string_val: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/bias/adam_v" + string_val: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/kernel" + string_val: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/kernel/adam_m" + string_val: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/kernel/adam_v" + string_val: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/bias" + string_val: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/bias/adam_m" + string_val: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/bias/adam_v" + string_val: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/kernel" + string_val: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/kernel/adam_m" + string_val: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/kernel/adam_v" + string_val: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/value/bias" + string_val: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/value/bias/adam_m" + string_val: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/value/bias/adam_v" + string_val: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/value/kernel" + string_val: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/value/kernel/adam_m" + string_val: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/value/kernel/adam_v" + string_val: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/bias" + string_val: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/bias/adam_m" + string_val: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/bias/adam_v" + string_val: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/kernel" + string_val: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/kernel/adam_m" + string_val: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/kernel/adam_v" + string_val: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/bias" + string_val: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/bias/adam_m" + string_val: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/bias/adam_v" + string_val: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/kernel" + string_val: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/kernel/adam_m" + string_val: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/kernel/adam_v" + string_val: "bert/pooler/dense/bias" + string_val: "bert/pooler/dense/bias/adam_m" + string_val: "bert/pooler/dense/bias/adam_v" + string_val: "bert/pooler/dense/kernel" + string_val: "bert/pooler/dense/kernel/adam_m" + string_val: "bert/pooler/dense/kernel/adam_v" + string_val: "cls/predictions/output_bias" + string_val: "cls/predictions/output_bias/adam_m" + string_val: "cls/predictions/output_bias/adam_v" + string_val: "cls/predictions/transform/LayerNorm/beta" + string_val: "cls/predictions/transform/LayerNorm/beta/adam_m" + string_val: "cls/predictions/transform/LayerNorm/beta/adam_v" + string_val: "cls/predictions/transform/LayerNorm/gamma" + string_val: "cls/predictions/transform/LayerNorm/gamma/adam_m" + string_val: "cls/predictions/transform/LayerNorm/gamma/adam_v" + string_val: "cls/predictions/transform/dense/bias" + string_val: "cls/predictions/transform/dense/bias/adam_m" + string_val: "cls/predictions/transform/dense/bias/adam_v" + string_val: "cls/predictions/transform/dense/kernel" + string_val: "cls/predictions/transform/dense/kernel/adam_m" + string_val: "cls/predictions/transform/dense/kernel/adam_v" + string_val: "cls/seq_relationship/output_bias" + string_val: "cls/seq_relationship/output_bias/adam_m" + string_val: "cls/seq_relationship/output_bias/adam_v" + string_val: "cls/seq_relationship/output_weights" + string_val: "cls/seq_relationship/output_weights/adam_m" + string_val: "cls/seq_relationship/output_weights/adam_v" + string_val: "global_step" + } + } + } +} +node { + name: "save/SaveV2/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 97 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 97 + } + } + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + } + } + } +} +node { + name: "save/SaveV2" + op: "SaveV2" + input: "save/ShardedFilename" + input: "save/SaveV2/tensor_names" + input: "save/SaveV2/shape_and_slices" + input: "bert/embeddings/LayerNorm/beta" + input: "bert/embeddings/LayerNorm/beta/adam_m" + input: "bert/embeddings/LayerNorm/beta/adam_v" + input: "bert/embeddings/LayerNorm/gamma" + input: "bert/embeddings/LayerNorm/gamma/adam_m" + input: "bert/embeddings/LayerNorm/gamma/adam_v" + input: "bert/embeddings/position_embeddings" + input: "bert/embeddings/position_embeddings/adam_m" + input: "bert/embeddings/position_embeddings/adam_v" + input: "bert/embeddings/token_type_embeddings" + input: "bert/embeddings/token_type_embeddings/adam_m" + input: "bert/embeddings/token_type_embeddings/adam_v" + input: "bert/embeddings/word_embeddings" + input: "bert/embeddings/word_embeddings/adam_m" + input: "bert/embeddings/word_embeddings/adam_v" + input: "bert/encoder/embedding_hidden_mapping_in/bias" + input: "bert/encoder/embedding_hidden_mapping_in/bias/adam_m" + input: "bert/encoder/embedding_hidden_mapping_in/bias/adam_v" + input: "bert/encoder/embedding_hidden_mapping_in/kernel" + input: "bert/encoder/embedding_hidden_mapping_in/kernel/adam_m" + input: "bert/encoder/embedding_hidden_mapping_in/kernel/adam_v" + input: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm/beta" + input: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm/beta/adam_m" + input: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm/beta/adam_v" + input: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm/gamma" + input: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm/gamma/adam_m" + input: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm/gamma/adam_v" + input: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm_1/beta" + input: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm_1/beta/adam_m" + input: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm_1/beta/adam_v" + input: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm_1/gamma" + input: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm_1/gamma/adam_m" + input: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm_1/gamma/adam_v" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/output/dense/bias" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/output/dense/bias/adam_m" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/output/dense/bias/adam_v" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/output/dense/kernel" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/output/dense/kernel/adam_m" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/output/dense/kernel/adam_v" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/bias" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/bias/adam_m" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/bias/adam_v" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/kernel" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/kernel/adam_m" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/kernel/adam_v" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/bias" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/bias/adam_m" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/bias/adam_v" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/kernel" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/kernel/adam_m" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/kernel/adam_v" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/value/bias" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/value/bias/adam_m" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/value/bias/adam_v" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/value/kernel" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/value/kernel/adam_m" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/value/kernel/adam_v" + input: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/bias" + input: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/bias/adam_m" + input: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/bias/adam_v" + input: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/kernel" + input: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/kernel/adam_m" + input: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/kernel/adam_v" + input: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/bias" + input: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/bias/adam_m" + input: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/bias/adam_v" + input: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/kernel" + input: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/kernel/adam_m" + input: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/kernel/adam_v" + input: "bert/pooler/dense/bias" + input: "bert/pooler/dense/bias/adam_m" + input: "bert/pooler/dense/bias/adam_v" + input: "bert/pooler/dense/kernel" + input: "bert/pooler/dense/kernel/adam_m" + input: "bert/pooler/dense/kernel/adam_v" + input: "cls/predictions/output_bias" + input: "cls/predictions/output_bias/adam_m" + input: "cls/predictions/output_bias/adam_v" + input: "cls/predictions/transform/LayerNorm/beta" + input: "cls/predictions/transform/LayerNorm/beta/adam_m" + input: "cls/predictions/transform/LayerNorm/beta/adam_v" + input: "cls/predictions/transform/LayerNorm/gamma" + input: "cls/predictions/transform/LayerNorm/gamma/adam_m" + input: "cls/predictions/transform/LayerNorm/gamma/adam_v" + input: "cls/predictions/transform/dense/bias" + input: "cls/predictions/transform/dense/bias/adam_m" + input: "cls/predictions/transform/dense/bias/adam_v" + input: "cls/predictions/transform/dense/kernel" + input: "cls/predictions/transform/dense/kernel/adam_m" + input: "cls/predictions/transform/dense/kernel/adam_v" + input: "cls/seq_relationship/output_bias" + input: "cls/seq_relationship/output_bias/adam_m" + input: "cls/seq_relationship/output_bias/adam_v" + input: "cls/seq_relationship/output_weights" + input: "cls/seq_relationship/output_weights/adam_m" + input: "cls/seq_relationship/output_weights/adam_v" + input: "global_step/Read/ReadVariableOp" + device: "/device:CPU:0" + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_INT64 + } + } + } +} +node { + name: "save/control_dependency" + op: "Identity" + input: "save/ShardedFilename" + input: "^save/SaveV2" + device: "/device:CPU:0" + attr { + key: "T" + value { + type: DT_STRING + } + } + attr { + key: "_class" + value { + list { + s: "loc:@save/ShardedFilename" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "save/MergeV2Checkpoints/checkpoint_prefixes" + op: "Pack" + input: "save/ShardedFilename" + input: "^save/control_dependency" + device: "/device:CPU:0" + attr { + key: "N" + value { + i: 1 + } + } + attr { + key: "T" + value { + type: DT_STRING + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "axis" + value { + i: 0 + } + } +} +node { + name: "save/MergeV2Checkpoints" + op: "MergeV2Checkpoints" + input: "save/MergeV2Checkpoints/checkpoint_prefixes" + input: "save/Const" + device: "/device:CPU:0" + attr { + key: "delete_old_dirs" + value { + b: true + } + } +} +node { + name: "save/Identity" + op: "Identity" + input: "save/Const" + input: "^save/MergeV2Checkpoints" + input: "^save/control_dependency" + device: "/device:CPU:0" + attr { + key: "T" + value { + type: DT_STRING + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } +} +node { + name: "save/RestoreV2/tensor_names" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 97 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 97 + } + } + string_val: "bert/embeddings/LayerNorm/beta" + string_val: "bert/embeddings/LayerNorm/beta/adam_m" + string_val: "bert/embeddings/LayerNorm/beta/adam_v" + string_val: "bert/embeddings/LayerNorm/gamma" + string_val: "bert/embeddings/LayerNorm/gamma/adam_m" + string_val: "bert/embeddings/LayerNorm/gamma/adam_v" + string_val: "bert/embeddings/position_embeddings" + string_val: "bert/embeddings/position_embeddings/adam_m" + string_val: "bert/embeddings/position_embeddings/adam_v" + string_val: "bert/embeddings/token_type_embeddings" + string_val: "bert/embeddings/token_type_embeddings/adam_m" + string_val: "bert/embeddings/token_type_embeddings/adam_v" + string_val: "bert/embeddings/word_embeddings" + string_val: "bert/embeddings/word_embeddings/adam_m" + string_val: "bert/embeddings/word_embeddings/adam_v" + string_val: "bert/encoder/embedding_hidden_mapping_in/bias" + string_val: "bert/encoder/embedding_hidden_mapping_in/bias/adam_m" + string_val: "bert/encoder/embedding_hidden_mapping_in/bias/adam_v" + string_val: "bert/encoder/embedding_hidden_mapping_in/kernel" + string_val: "bert/encoder/embedding_hidden_mapping_in/kernel/adam_m" + string_val: "bert/encoder/embedding_hidden_mapping_in/kernel/adam_v" + string_val: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm/beta" + string_val: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm/beta/adam_m" + string_val: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm/beta/adam_v" + string_val: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm/gamma" + string_val: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm/gamma/adam_m" + string_val: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm/gamma/adam_v" + string_val: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm_1/beta" + string_val: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm_1/beta/adam_m" + string_val: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm_1/beta/adam_v" + string_val: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm_1/gamma" + string_val: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm_1/gamma/adam_m" + string_val: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm_1/gamma/adam_v" + string_val: "bert/encoder/transformer/group_0/inner_group_0/attention_1/output/dense/bias" + string_val: "bert/encoder/transformer/group_0/inner_group_0/attention_1/output/dense/bias/adam_m" + string_val: "bert/encoder/transformer/group_0/inner_group_0/attention_1/output/dense/bias/adam_v" + string_val: "bert/encoder/transformer/group_0/inner_group_0/attention_1/output/dense/kernel" + string_val: "bert/encoder/transformer/group_0/inner_group_0/attention_1/output/dense/kernel/adam_m" + string_val: "bert/encoder/transformer/group_0/inner_group_0/attention_1/output/dense/kernel/adam_v" + string_val: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/bias" + string_val: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/bias/adam_m" + string_val: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/bias/adam_v" + string_val: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/kernel" + string_val: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/kernel/adam_m" + string_val: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/kernel/adam_v" + string_val: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/bias" + string_val: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/bias/adam_m" + string_val: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/bias/adam_v" + string_val: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/kernel" + string_val: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/kernel/adam_m" + string_val: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/kernel/adam_v" + string_val: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/value/bias" + string_val: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/value/bias/adam_m" + string_val: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/value/bias/adam_v" + string_val: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/value/kernel" + string_val: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/value/kernel/adam_m" + string_val: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/value/kernel/adam_v" + string_val: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/bias" + string_val: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/bias/adam_m" + string_val: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/bias/adam_v" + string_val: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/kernel" + string_val: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/kernel/adam_m" + string_val: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/kernel/adam_v" + string_val: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/bias" + string_val: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/bias/adam_m" + string_val: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/bias/adam_v" + string_val: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/kernel" + string_val: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/kernel/adam_m" + string_val: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/kernel/adam_v" + string_val: "bert/pooler/dense/bias" + string_val: "bert/pooler/dense/bias/adam_m" + string_val: "bert/pooler/dense/bias/adam_v" + string_val: "bert/pooler/dense/kernel" + string_val: "bert/pooler/dense/kernel/adam_m" + string_val: "bert/pooler/dense/kernel/adam_v" + string_val: "cls/predictions/output_bias" + string_val: "cls/predictions/output_bias/adam_m" + string_val: "cls/predictions/output_bias/adam_v" + string_val: "cls/predictions/transform/LayerNorm/beta" + string_val: "cls/predictions/transform/LayerNorm/beta/adam_m" + string_val: "cls/predictions/transform/LayerNorm/beta/adam_v" + string_val: "cls/predictions/transform/LayerNorm/gamma" + string_val: "cls/predictions/transform/LayerNorm/gamma/adam_m" + string_val: "cls/predictions/transform/LayerNorm/gamma/adam_v" + string_val: "cls/predictions/transform/dense/bias" + string_val: "cls/predictions/transform/dense/bias/adam_m" + string_val: "cls/predictions/transform/dense/bias/adam_v" + string_val: "cls/predictions/transform/dense/kernel" + string_val: "cls/predictions/transform/dense/kernel/adam_m" + string_val: "cls/predictions/transform/dense/kernel/adam_v" + string_val: "cls/seq_relationship/output_bias" + string_val: "cls/seq_relationship/output_bias/adam_m" + string_val: "cls/seq_relationship/output_bias/adam_v" + string_val: "cls/seq_relationship/output_weights" + string_val: "cls/seq_relationship/output_weights/adam_m" + string_val: "cls/seq_relationship/output_weights/adam_v" + string_val: "global_step" + } + } + } +} +node { + name: "save/RestoreV2/shape_and_slices" + op: "Const" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 97 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + dim { + size: 97 + } + } + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + string_val: "" + } + } + } +} +node { + name: "save/RestoreV2" + op: "RestoreV2" + input: "save/Const" + input: "save/RestoreV2/tensor_names" + input: "save/RestoreV2/shape_and_slices" + device: "/device:CPU:0" + attr { + key: "_output_shapes" + value { + list { + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + shape { + unknown_rank: true + } + } + } + } + attr { + key: "dtypes" + value { + list { + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_FLOAT + type: DT_INT64 + } + } + } +} +node { + name: "save/Assign" + op: "Assign" + input: "bert/embeddings/LayerNorm/beta" + input: "save/RestoreV2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 128 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_1" + op: "Assign" + input: "bert/embeddings/LayerNorm/beta/adam_m" + input: "save/RestoreV2:1" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 128 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_2" + op: "Assign" + input: "bert/embeddings/LayerNorm/beta/adam_v" + input: "save/RestoreV2:2" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 128 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_3" + op: "Assign" + input: "bert/embeddings/LayerNorm/gamma" + input: "save/RestoreV2:3" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 128 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_4" + op: "Assign" + input: "bert/embeddings/LayerNorm/gamma/adam_m" + input: "save/RestoreV2:4" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 128 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_5" + op: "Assign" + input: "bert/embeddings/LayerNorm/gamma/adam_v" + input: "save/RestoreV2:5" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 128 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_6" + op: "Assign" + input: "bert/embeddings/position_embeddings" + input: "save/RestoreV2:6" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/position_embeddings" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 512 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_7" + op: "Assign" + input: "bert/embeddings/position_embeddings/adam_m" + input: "save/RestoreV2:7" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/position_embeddings/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 512 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_8" + op: "Assign" + input: "bert/embeddings/position_embeddings/adam_v" + input: "save/RestoreV2:8" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/position_embeddings/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 512 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_9" + op: "Assign" + input: "bert/embeddings/token_type_embeddings" + input: "save/RestoreV2:9" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/token_type_embeddings" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_10" + op: "Assign" + input: "bert/embeddings/token_type_embeddings/adam_m" + input: "save/RestoreV2:10" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/token_type_embeddings/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_11" + op: "Assign" + input: "bert/embeddings/token_type_embeddings/adam_v" + input: "save/RestoreV2:11" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/token_type_embeddings/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_12" + op: "Assign" + input: "bert/embeddings/word_embeddings" + input: "save/RestoreV2:12" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/word_embeddings" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 105686 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_13" + op: "Assign" + input: "bert/embeddings/word_embeddings/adam_m" + input: "save/RestoreV2:13" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/word_embeddings/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 105686 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_14" + op: "Assign" + input: "bert/embeddings/word_embeddings/adam_v" + input: "save/RestoreV2:14" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/embeddings/word_embeddings/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 105686 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_15" + op: "Assign" + input: "bert/encoder/embedding_hidden_mapping_in/bias" + input: "save/RestoreV2:15" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/embedding_hidden_mapping_in/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_16" + op: "Assign" + input: "bert/encoder/embedding_hidden_mapping_in/bias/adam_m" + input: "save/RestoreV2:16" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/embedding_hidden_mapping_in/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_17" + op: "Assign" + input: "bert/encoder/embedding_hidden_mapping_in/bias/adam_v" + input: "save/RestoreV2:17" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/embedding_hidden_mapping_in/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_18" + op: "Assign" + input: "bert/encoder/embedding_hidden_mapping_in/kernel" + input: "save/RestoreV2:18" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/embedding_hidden_mapping_in/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 128 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_19" + op: "Assign" + input: "bert/encoder/embedding_hidden_mapping_in/kernel/adam_m" + input: "save/RestoreV2:19" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/embedding_hidden_mapping_in/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 128 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_20" + op: "Assign" + input: "bert/encoder/embedding_hidden_mapping_in/kernel/adam_v" + input: "save/RestoreV2:20" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/embedding_hidden_mapping_in/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 128 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_21" + op: "Assign" + input: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm/beta" + input: "save/RestoreV2:21" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_22" + op: "Assign" + input: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm/beta/adam_m" + input: "save/RestoreV2:22" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_23" + op: "Assign" + input: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm/beta/adam_v" + input: "save/RestoreV2:23" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_24" + op: "Assign" + input: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm/gamma" + input: "save/RestoreV2:24" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_25" + op: "Assign" + input: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm/gamma/adam_m" + input: "save/RestoreV2:25" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_26" + op: "Assign" + input: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm/gamma/adam_v" + input: "save/RestoreV2:26" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_27" + op: "Assign" + input: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm_1/beta" + input: "save/RestoreV2:27" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/LayerNorm_1/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_28" + op: "Assign" + input: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm_1/beta/adam_m" + input: "save/RestoreV2:28" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/LayerNorm_1/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_29" + op: "Assign" + input: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm_1/beta/adam_v" + input: "save/RestoreV2:29" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/LayerNorm_1/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_30" + op: "Assign" + input: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm_1/gamma" + input: "save/RestoreV2:30" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/LayerNorm_1/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_31" + op: "Assign" + input: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm_1/gamma/adam_m" + input: "save/RestoreV2:31" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/LayerNorm_1/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_32" + op: "Assign" + input: "bert/encoder/transformer/group_0/inner_group_0/LayerNorm_1/gamma/adam_v" + input: "save/RestoreV2:32" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/LayerNorm_1/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_33" + op: "Assign" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/output/dense/bias" + input: "save/RestoreV2:33" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_34" + op: "Assign" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/output/dense/bias/adam_m" + input: "save/RestoreV2:34" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_35" + op: "Assign" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/output/dense/bias/adam_v" + input: "save/RestoreV2:35" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_36" + op: "Assign" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/output/dense/kernel" + input: "save/RestoreV2:36" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_37" + op: "Assign" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/output/dense/kernel/adam_m" + input: "save/RestoreV2:37" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_38" + op: "Assign" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/output/dense/kernel/adam_v" + input: "save/RestoreV2:38" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_39" + op: "Assign" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/bias" + input: "save/RestoreV2:39" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_40" + op: "Assign" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/bias/adam_m" + input: "save/RestoreV2:40" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_41" + op: "Assign" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/bias/adam_v" + input: "save/RestoreV2:41" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_42" + op: "Assign" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/kernel" + input: "save/RestoreV2:42" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_43" + op: "Assign" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/kernel/adam_m" + input: "save/RestoreV2:43" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_44" + op: "Assign" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/kernel/adam_v" + input: "save/RestoreV2:44" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/self/key/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_45" + op: "Assign" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/bias" + input: "save/RestoreV2:45" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_46" + op: "Assign" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/bias/adam_m" + input: "save/RestoreV2:46" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_47" + op: "Assign" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/bias/adam_v" + input: "save/RestoreV2:47" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_48" + op: "Assign" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/kernel" + input: "save/RestoreV2:48" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_49" + op: "Assign" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/kernel/adam_m" + input: "save/RestoreV2:49" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_50" + op: "Assign" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/kernel/adam_v" + input: "save/RestoreV2:50" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/self/query/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_51" + op: "Assign" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/value/bias" + input: "save/RestoreV2:51" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/self/value/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_52" + op: "Assign" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/value/bias/adam_m" + input: "save/RestoreV2:52" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/self/value/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_53" + op: "Assign" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/value/bias/adam_v" + input: "save/RestoreV2:53" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/self/value/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_54" + op: "Assign" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/value/kernel" + input: "save/RestoreV2:54" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/self/value/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_55" + op: "Assign" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/value/kernel/adam_m" + input: "save/RestoreV2:55" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/self/value/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_56" + op: "Assign" + input: "bert/encoder/transformer/group_0/inner_group_0/attention_1/self/value/kernel/adam_v" + input: "save/RestoreV2:56" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/attention_1/self/value/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_57" + op: "Assign" + input: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/bias" + input: "save/RestoreV2:57" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_58" + op: "Assign" + input: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/bias/adam_m" + input: "save/RestoreV2:58" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_59" + op: "Assign" + input: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/bias/adam_v" + input: "save/RestoreV2:59" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_60" + op: "Assign" + input: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/kernel" + input: "save/RestoreV2:60" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_61" + op: "Assign" + input: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/kernel/adam_m" + input: "save/RestoreV2:61" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_62" + op: "Assign" + input: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/kernel/adam_v" + input: "save/RestoreV2:62" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 3072 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_63" + op: "Assign" + input: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/bias" + input: "save/RestoreV2:63" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_64" + op: "Assign" + input: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/bias/adam_m" + input: "save/RestoreV2:64" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_65" + op: "Assign" + input: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/bias/adam_v" + input: "save/RestoreV2:65" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_66" + op: "Assign" + input: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/kernel" + input: "save/RestoreV2:66" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_67" + op: "Assign" + input: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/kernel/adam_m" + input: "save/RestoreV2:67" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_68" + op: "Assign" + input: "bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/kernel/adam_v" + input: "save/RestoreV2:68" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/encoder/transformer/group_0/inner_group_0/ffn_1/intermediate/output/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 3072 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_69" + op: "Assign" + input: "bert/pooler/dense/bias" + input: "save/RestoreV2:69" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/pooler/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_70" + op: "Assign" + input: "bert/pooler/dense/bias/adam_m" + input: "save/RestoreV2:70" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/pooler/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_71" + op: "Assign" + input: "bert/pooler/dense/bias/adam_v" + input: "save/RestoreV2:71" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/pooler/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_72" + op: "Assign" + input: "bert/pooler/dense/kernel" + input: "save/RestoreV2:72" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/pooler/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_73" + op: "Assign" + input: "bert/pooler/dense/kernel/adam_m" + input: "save/RestoreV2:73" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/pooler/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_74" + op: "Assign" + input: "bert/pooler/dense/kernel/adam_v" + input: "save/RestoreV2:74" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@bert/pooler/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_75" + op: "Assign" + input: "cls/predictions/output_bias" + input: "save/RestoreV2:75" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/output_bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 105686 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_76" + op: "Assign" + input: "cls/predictions/output_bias/adam_m" + input: "save/RestoreV2:76" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/output_bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 105686 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_77" + op: "Assign" + input: "cls/predictions/output_bias/adam_v" + input: "save/RestoreV2:77" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/output_bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 105686 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_78" + op: "Assign" + input: "cls/predictions/transform/LayerNorm/beta" + input: "save/RestoreV2:78" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/transform/LayerNorm/beta" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 128 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_79" + op: "Assign" + input: "cls/predictions/transform/LayerNorm/beta/adam_m" + input: "save/RestoreV2:79" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/transform/LayerNorm/beta/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 128 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_80" + op: "Assign" + input: "cls/predictions/transform/LayerNorm/beta/adam_v" + input: "save/RestoreV2:80" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/transform/LayerNorm/beta/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 128 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_81" + op: "Assign" + input: "cls/predictions/transform/LayerNorm/gamma" + input: "save/RestoreV2:81" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/transform/LayerNorm/gamma" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 128 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_82" + op: "Assign" + input: "cls/predictions/transform/LayerNorm/gamma/adam_m" + input: "save/RestoreV2:82" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/transform/LayerNorm/gamma/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 128 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_83" + op: "Assign" + input: "cls/predictions/transform/LayerNorm/gamma/adam_v" + input: "save/RestoreV2:83" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/transform/LayerNorm/gamma/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 128 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_84" + op: "Assign" + input: "cls/predictions/transform/dense/bias" + input: "save/RestoreV2:84" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/transform/dense/bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 128 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_85" + op: "Assign" + input: "cls/predictions/transform/dense/bias/adam_m" + input: "save/RestoreV2:85" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/transform/dense/bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 128 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_86" + op: "Assign" + input: "cls/predictions/transform/dense/bias/adam_v" + input: "save/RestoreV2:86" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/transform/dense/bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 128 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_87" + op: "Assign" + input: "cls/predictions/transform/dense/kernel" + input: "save/RestoreV2:87" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/transform/dense/kernel" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_88" + op: "Assign" + input: "cls/predictions/transform/dense/kernel/adam_m" + input: "save/RestoreV2:88" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/transform/dense/kernel/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_89" + op: "Assign" + input: "cls/predictions/transform/dense/kernel/adam_v" + input: "save/RestoreV2:89" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@cls/predictions/transform/dense/kernel/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 768 + } + dim { + size: 128 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_90" + op: "Assign" + input: "cls/seq_relationship/output_bias" + input: "save/RestoreV2:90" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@cls/seq_relationship/output_bias" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_91" + op: "Assign" + input: "cls/seq_relationship/output_bias/adam_m" + input: "save/RestoreV2:91" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@cls/seq_relationship/output_bias/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_92" + op: "Assign" + input: "cls/seq_relationship/output_bias/adam_v" + input: "save/RestoreV2:92" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@cls/seq_relationship/output_bias/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_93" + op: "Assign" + input: "cls/seq_relationship/output_weights" + input: "save/RestoreV2:93" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@cls/seq_relationship/output_weights" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_94" + op: "Assign" + input: "cls/seq_relationship/output_weights/adam_m" + input: "save/RestoreV2:94" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@cls/seq_relationship/output_weights/adam_m" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Assign_95" + op: "Assign" + input: "cls/seq_relationship/output_weights/adam_v" + input: "save/RestoreV2:95" + attr { + key: "T" + value { + type: DT_FLOAT + } + } + attr { + key: "_class" + value { + list { + s: "loc:@cls/seq_relationship/output_weights/adam_v" + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 2 + } + dim { + size: 768 + } + } + } + } + } + attr { + key: "use_locking" + value { + b: true + } + } + attr { + key: "validate_shape" + value { + b: true + } + } +} +node { + name: "save/Identity_1" + op: "Identity" + input: "save/RestoreV2:96" + attr { + key: "T" + value { + type: DT_INT64 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + unknown_rank: true + } + } + } + } +} +node { + name: "save/AssignVariableOp" + op: "AssignVariableOp" + input: "global_step" + input: "save/Identity_1" + attr { + key: "dtype" + value { + type: DT_INT64 + } + } +} +node { + name: "save/restore_shard" + op: "NoOp" + input: "^save/Assign" + input: "^save/AssignVariableOp" + input: "^save/Assign_1" + input: "^save/Assign_10" + input: "^save/Assign_11" + input: "^save/Assign_12" + input: "^save/Assign_13" + input: "^save/Assign_14" + input: "^save/Assign_15" + input: "^save/Assign_16" + input: "^save/Assign_17" + input: "^save/Assign_18" + input: "^save/Assign_19" + input: "^save/Assign_2" + input: "^save/Assign_20" + input: "^save/Assign_21" + input: "^save/Assign_22" + input: "^save/Assign_23" + input: "^save/Assign_24" + input: "^save/Assign_25" + input: "^save/Assign_26" + input: "^save/Assign_27" + input: "^save/Assign_28" + input: "^save/Assign_29" + input: "^save/Assign_3" + input: "^save/Assign_30" + input: "^save/Assign_31" + input: "^save/Assign_32" + input: "^save/Assign_33" + input: "^save/Assign_34" + input: "^save/Assign_35" + input: "^save/Assign_36" + input: "^save/Assign_37" + input: "^save/Assign_38" + input: "^save/Assign_39" + input: "^save/Assign_4" + input: "^save/Assign_40" + input: "^save/Assign_41" + input: "^save/Assign_42" + input: "^save/Assign_43" + input: "^save/Assign_44" + input: "^save/Assign_45" + input: "^save/Assign_46" + input: "^save/Assign_47" + input: "^save/Assign_48" + input: "^save/Assign_49" + input: "^save/Assign_5" + input: "^save/Assign_50" + input: "^save/Assign_51" + input: "^save/Assign_52" + input: "^save/Assign_53" + input: "^save/Assign_54" + input: "^save/Assign_55" + input: "^save/Assign_56" + input: "^save/Assign_57" + input: "^save/Assign_58" + input: "^save/Assign_59" + input: "^save/Assign_6" + input: "^save/Assign_60" + input: "^save/Assign_61" + input: "^save/Assign_62" + input: "^save/Assign_63" + input: "^save/Assign_64" + input: "^save/Assign_65" + input: "^save/Assign_66" + input: "^save/Assign_67" + input: "^save/Assign_68" + input: "^save/Assign_69" + input: "^save/Assign_7" + input: "^save/Assign_70" + input: "^save/Assign_71" + input: "^save/Assign_72" + input: "^save/Assign_73" + input: "^save/Assign_74" + input: "^save/Assign_75" + input: "^save/Assign_76" + input: "^save/Assign_77" + input: "^save/Assign_78" + input: "^save/Assign_79" + input: "^save/Assign_8" + input: "^save/Assign_80" + input: "^save/Assign_81" + input: "^save/Assign_82" + input: "^save/Assign_83" + input: "^save/Assign_84" + input: "^save/Assign_85" + input: "^save/Assign_86" + input: "^save/Assign_87" + input: "^save/Assign_88" + input: "^save/Assign_89" + input: "^save/Assign_9" + input: "^save/Assign_90" + input: "^save/Assign_91" + input: "^save/Assign_92" + input: "^save/Assign_93" + input: "^save/Assign_94" + input: "^save/Assign_95" +} +node { + name: "save/restore_all" + op: "NoOp" + input: "^save/restore_shard" +} +library { + function { + signature { + name: "__inference_Dataset_flat_map_read_one_file_40" + input_arg { + name: "args_0" + type: DT_STRING + } + output_arg { + name: "identity" + type: DT_VARIANT + } + is_stateful: true + control_output: "TFRecordDataset" + } + node_def { + name: "compression_type" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_STRING + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_STRING + tensor_shape { + } + string_val: "" + } + } + } + } + node_def { + name: "buffer_size" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT64 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT64 + tensor_shape { + } + int64_val: 262144 + } + } + } + } + node_def { + name: "TFRecordDataset" + op: "TFRecordDataset" + input: "args_0" + input: "compression_type:output:0" + input: "buffer_size:output:0" + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + } + node_def { + name: "Identity" + op: "Identity" + input: "TFRecordDataset:handle:0" + input: "^TFRecordDataset" + attr { + key: "T" + value { + type: DT_VARIANT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + } + ret { + key: "identity" + value: "Identity:output:0" + } + attr { + key: "_input_shapes" + value { + list { + shape { + } + } + } + } + control_ret { + key: "TFRecordDataset" + value: "TFRecordDataset" + } + arg_attr { + value { + attr { + key: "_user_specified_name" + value { + s: "args_0" + } + } + } + } + } + function { + signature { + name: "__inference_tf_data_experimental_parallel_interleave__43" + input_arg { + name: "args_0" + type: DT_STRING + } + output_arg { + name: "identity" + type: DT_VARIANT + } + is_stateful: true + control_output: "TensorSliceDataset" + } + node_def { + name: "flat_filenames/shape" + op: "Const" + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + attr { + key: "dtype" + value { + type: DT_INT32 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT32 + tensor_shape { + dim { + size: 1 + } + } + int_val: -1 + } + } + } + } + node_def { + name: "flat_filenames" + op: "Reshape" + input: "args_0" + input: "flat_filenames/shape:output:0" + attr { + key: "T" + value { + type: DT_STRING + } + } + attr { + key: "Tshape" + value { + type: DT_INT32 + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + dim { + size: 1 + } + } + } + } + } + } + node_def { + name: "TensorSliceDataset" + op: "TensorSliceDataset" + input: "flat_filenames:output:0" + attr { + key: "Toutput_types" + value { + list { + type: DT_STRING + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "output_shapes" + value { + list { + shape { + } + } + } + } + } + node_def { + name: "FlatMapDataset" + op: "FlatMapDataset" + input: "TensorSliceDataset:handle:0" + attr { + key: "Targuments" + value { + list { + } + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "f" + value { + func { + name: "__inference_Dataset_flat_map_read_one_file_40" + } + } + } + attr { + key: "output_shapes" + value { + list { + shape { + } + } + } + } + attr { + key: "output_types" + value { + list { + type: DT_STRING + } + } + } + } + node_def { + name: "Identity" + op: "Identity" + input: "FlatMapDataset:handle:0" + input: "^TensorSliceDataset" + attr { + key: "T" + value { + type: DT_VARIANT + } + } + attr { + key: "_output_shapes" + value { + list { + shape { + } + } + } + } + } + ret { + key: "identity" + value: "Identity:output:0" + } + attr { + key: "_input_shapes" + value { + list { + shape { + } + } + } + } + control_ret { + key: "TensorSliceDataset" + value: "TensorSliceDataset" + } + arg_attr { + value { + attr { + key: "_user_specified_name" + value { + s: "args_0" + } + } + } + } + } + function { + signature { + name: "tf_data_experimental_map_and_batch__54" + input_arg { + name: "arg0" + type: DT_STRING + } + output_arg { + name: "toint32" + type: DT_INT32 + } + output_arg { + name: "toint32_1" + type: DT_INT32 + } + output_arg { + name: "toint32_2" + type: DT_INT32 + } + output_arg { + name: "toint32_3" + type: DT_INT32 + } + output_arg { + name: "parsesingleexample_parsesingleexample" + type: DT_FLOAT + } + output_arg { + name: "toint32_4" + type: DT_INT32 + } + output_arg { + name: "toint32_5" + type: DT_INT32 + } + } + node_def { + name: "ParseSingleExample/Const" + op: "Const" + device: "/device:CPU:0" + attr { + key: "dtype" + value { + type: DT_INT64 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT64 + tensor_shape { + dim { + } + } + } + } + } + } + node_def { + name: "ParseSingleExample/Const_1" + op: "Const" + device: "/device:CPU:0" + attr { + key: "dtype" + value { + type: DT_INT64 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT64 + tensor_shape { + dim { + } + } + } + } + } + } + node_def { + name: "ParseSingleExample/Const_2" + op: "Const" + device: "/device:CPU:0" + attr { + key: "dtype" + value { + type: DT_INT64 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT64 + tensor_shape { + dim { + } + } + } + } + } + } + node_def { + name: "ParseSingleExample/Const_3" + op: "Const" + device: "/device:CPU:0" + attr { + key: "dtype" + value { + type: DT_INT64 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT64 + tensor_shape { + dim { + } + } + } + } + } + } + node_def { + name: "ParseSingleExample/Const_4" + op: "Const" + device: "/device:CPU:0" + attr { + key: "dtype" + value { + type: DT_FLOAT + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_FLOAT + tensor_shape { + dim { + } + } + } + } + } + } + node_def { + name: "ParseSingleExample/Const_5" + op: "Const" + device: "/device:CPU:0" + attr { + key: "dtype" + value { + type: DT_INT64 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT64 + tensor_shape { + dim { + } + } + } + } + } + } + node_def { + name: "ParseSingleExample/Const_6" + op: "Const" + device: "/device:CPU:0" + attr { + key: "dtype" + value { + type: DT_INT64 + } + } + attr { + key: "value" + value { + tensor { + dtype: DT_INT64 + tensor_shape { + dim { + } + } + } + } + } + } + node_def { + name: "ParseSingleExample/ParseSingleExample" + op: "ParseSingleExample" + input: "arg0" + input: "ParseSingleExample/Const:output:0" + input: "ParseSingleExample/Const_1:output:0" + input: "ParseSingleExample/Const_2:output:0" + input: "ParseSingleExample/Const_3:output:0" + input: "ParseSingleExample/Const_4:output:0" + input: "ParseSingleExample/Const_5:output:0" + input: "ParseSingleExample/Const_6:output:0" + device: "/device:CPU:0" + attr { + key: "Tdense" + value { + list { + type: DT_INT64 + type: DT_INT64 + type: DT_INT64 + type: DT_INT64 + type: DT_FLOAT + type: DT_INT64 + type: DT_INT64 + } + } + } + attr { + key: "dense_keys" + value { + list { + s: "input_ids" + s: "input_mask" + s: "masked_lm_ids" + s: "masked_lm_positions" + s: "masked_lm_weights" + s: "next_sentence_labels" + s: "segment_ids" + } + } + } + attr { + key: "dense_shapes" + value { + list { + shape { + dim { + size: 64 + } + } + shape { + dim { + size: 64 + } + } + shape { + dim { + size: 20 + } + } + shape { + dim { + size: 20 + } + } + shape { + dim { + size: 20 + } + } + shape { + dim { + size: 1 + } + } + shape { + dim { + size: 64 + } + } + } + } + } + attr { + key: "num_sparse" + value { + i: 0 + } + } + attr { + key: "sparse_keys" + value { + list { + } + } + } + attr { + key: "sparse_types" + value { + list { + } + } + } + } + node_def { + name: "ToInt32" + op: "Cast" + input: "ParseSingleExample/ParseSingleExample:dense_values:0" + device: "/device:CPU:0" + attr { + key: "DstT" + value { + type: DT_INT32 + } + } + attr { + key: "SrcT" + value { + type: DT_INT64 + } + } + attr { + key: "Truncate" + value { + b: false + } + } + } + node_def { + name: "ToInt32_1" + op: "Cast" + input: "ParseSingleExample/ParseSingleExample:dense_values:1" + device: "/device:CPU:0" + attr { + key: "DstT" + value { + type: DT_INT32 + } + } + attr { + key: "SrcT" + value { + type: DT_INT64 + } + } + attr { + key: "Truncate" + value { + b: false + } + } + } + node_def { + name: "ToInt32_2" + op: "Cast" + input: "ParseSingleExample/ParseSingleExample:dense_values:2" + device: "/device:CPU:0" + attr { + key: "DstT" + value { + type: DT_INT32 + } + } + attr { + key: "SrcT" + value { + type: DT_INT64 + } + } + attr { + key: "Truncate" + value { + b: false + } + } + } + node_def { + name: "ToInt32_3" + op: "Cast" + input: "ParseSingleExample/ParseSingleExample:dense_values:3" + device: "/device:CPU:0" + attr { + key: "DstT" + value { + type: DT_INT32 + } + } + attr { + key: "SrcT" + value { + type: DT_INT64 + } + } + attr { + key: "Truncate" + value { + b: false + } + } + } + node_def { + name: "ToInt32_4" + op: "Cast" + input: "ParseSingleExample/ParseSingleExample:dense_values:5" + device: "/device:CPU:0" + attr { + key: "DstT" + value { + type: DT_INT32 + } + } + attr { + key: "SrcT" + value { + type: DT_INT64 + } + } + attr { + key: "Truncate" + value { + b: false + } + } + } + node_def { + name: "ToInt32_5" + op: "Cast" + input: "ParseSingleExample/ParseSingleExample:dense_values:6" + device: "/device:CPU:0" + attr { + key: "DstT" + value { + type: DT_INT32 + } + } + attr { + key: "SrcT" + value { + type: DT_INT64 + } + } + attr { + key: "Truncate" + value { + b: false + } + } + } + ret { + key: "parsesingleexample_parsesingleexample" + value: "ParseSingleExample/ParseSingleExample:dense_values:4" + } + ret { + key: "toint32" + value: "ToInt32:y:0" + } + ret { + key: "toint32_1" + value: "ToInt32_1:y:0" + } + ret { + key: "toint32_2" + value: "ToInt32_2:y:0" + } + ret { + key: "toint32_3" + value: "ToInt32_3:y:0" + } + ret { + key: "toint32_4" + value: "ToInt32_4:y:0" + } + ret { + key: "toint32_5" + value: "ToInt32_5:y:0" + } + attr { + key: "_disable_call_shape_inference" + value { + b: true + } + } + arg_attr { + value { + } + } + } +} +versions { + producer: 134 + min_consumer: 12 +}